Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0ce15d6f authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: allocate VM PDs/PTs on demand



Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 780637cb
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
	if (p_bo_va_entry)
		*p_bo_va_entry = bo_va_entry;

	/* Allocate new page tables if needed and validate
	 * them.
	 */
	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
	if (ret) {
		pr_err("Failed to allocate pts, err=%d\n", ret);
		goto err_alloc_pts;
	}

	/* Allocate validate page tables if needed */
	ret = vm_validate_pt_pd_bos(vm);
	if (ret) {
		pr_err("validate_pt_pd_bos() failed\n");
+0 −9
Original line number Diff line number Diff line
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
		return -ENOMEM;
	}

	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
				size);
	if (r) {
		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
		amdgpu_vm_bo_rmv(adev, *bo_va);
		ttm_eu_backoff_reservation(&ticket, &list);
		return r;
	}

	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
			     AMDGPU_PTE_EXECUTABLE);
+0 −10
Original line number Diff line number Diff line
@@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

	switch (args->operation) {
	case AMDGPU_VA_OP_MAP:
		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
					args->map_size);
		if (r)
			goto error_backoff;

		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
				     args->offset_in_bo, args->map_size,
@@ -645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
						args->map_size);
		break;
	case AMDGPU_VA_OP_REPLACE:
		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
					args->map_size);
		if (r)
			goto error_backoff;

		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
					     args->offset_in_bo, args->map_size,
+38 −98
Original line number Diff line number Diff line
@@ -520,47 +520,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
	}
}

/**
 * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
 *
 * @adev: amdgpu_device pointer
 * @vm: amdgpu_vm structure
 * @start: start addr of the walk
 * @cursor: state to initialize
 *
 * Start a walk and go directly to the leaf node.
 */
static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
				    struct amdgpu_vm *vm, uint64_t start,
				    struct amdgpu_vm_pt_cursor *cursor)
{
	amdgpu_vm_pt_start(adev, vm, start, cursor);
	while (amdgpu_vm_pt_descendant(adev, cursor));
}

/**
 * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
 *
 * @adev: amdgpu_device pointer
 * @cursor: current state
 *
 * Walk the PD/PT tree to the next leaf node.
 */
static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
				   struct amdgpu_vm_pt_cursor *cursor)
{
	amdgpu_vm_pt_next(adev, cursor);
	if (cursor->pfn != ~0ll)
		while (amdgpu_vm_pt_descendant(adev, cursor));
}

/**
 * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
 */
#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))

/**
 * amdgpu_vm_pt_first_dfs - start a deep first search
 *
@@ -932,51 +891,30 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 * Returns:
 * 0 on success, errno otherwise.
 */
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
			       struct amdgpu_vm *vm,
			uint64_t saddr, uint64_t size)
			       struct amdgpu_vm_pt_cursor *cursor)
{
	struct amdgpu_vm_pt_cursor cursor;
	struct amdgpu_vm_pt *entry = cursor->entry;
	struct amdgpu_bo_param bp;
	struct amdgpu_bo *pt;
	uint64_t eaddr;
	int r;

	/* validate the parameters */
	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
		return -EINVAL;

	eaddr = saddr + size - 1;

	saddr /= AMDGPU_GPU_PAGE_SIZE;
	eaddr /= AMDGPU_GPU_PAGE_SIZE;

	if (eaddr >= adev->vm_manager.max_pfn) {
		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
			eaddr, adev->vm_manager.max_pfn);
		return -EINVAL;
	}

	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
		struct amdgpu_vm_pt *entry = cursor.entry;
		struct amdgpu_bo_param bp;

		if (cursor.level < AMDGPU_VM_PTB) {
	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
		unsigned num_entries;

			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
		entry->entries = kvmalloc_array(num_entries,
						sizeof(*entry->entries),
							GFP_KERNEL |
							__GFP_ZERO);
						GFP_KERNEL | __GFP_ZERO);
		if (!entry->entries)
			return -ENOMEM;
	}


	if (entry->base.bo)
			continue;
		return 0;

		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);

	r = amdgpu_bo_create(adev, &bp, &pt);
	if (r)
@@ -991,14 +929,12 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
	/* Keep a reference to the root directory to avoid
	 * freeing them up in the wrong order.
	 */
		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);

	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
	amdgpu_vm_bo_base_init(&entry->base, vm, pt);

	r = amdgpu_vm_clear_bo(adev, vm, pt);
	if (r)
		goto error_free_pt;
	}

	return 0;

@@ -1644,6 +1580,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
	struct amdgpu_vm_pt_cursor cursor;
	uint64_t frag_start = start, frag_end;
	unsigned int frag;
	int r;

	/* figure out the initial fragment */
	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
@@ -1651,12 +1588,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
	/* walk over the address space and update the PTs */
	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
	while (cursor.pfn < end) {
		struct amdgpu_bo *pt = cursor.entry->base.bo;
		unsigned shift, parent_shift, mask;
		uint64_t incr, entry_end, pe_start;
		struct amdgpu_bo *pt;

		if (!pt)
			return -ENOENT;
		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
		if (r)
			return r;

		pt = cursor.entry->base.bo;

		/* The root level can't be a huge page */
		if (cursor.level == adev->vm_manager.root_level) {
+0 −3
Original line number Diff line number Diff line
@@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			      int (*callback)(void *p, struct amdgpu_bo *bo),
			      void *param);
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
			struct amdgpu_vm *vm,
			uint64_t saddr, uint64_t size);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
				 struct amdgpu_vm *vm);