Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 915d3eec authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher
Browse files

drm/amdgpu: replace get_user_pages with HMM mirror helpers



Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8dd69e69
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -61,7 +61,6 @@ struct kgd_mem {

	atomic_t invalid;
	struct amdkfd_process_info *process_info;
	struct page **user_pages;

	struct amdgpu_sync sync;

+25 −70
Original line number Diff line number Diff line
@@ -499,28 +499,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
		goto out;
	}

	/* If no restore worker is running concurrently, user_pages
	 * should not be allocated
	 */
	WARN(mem->user_pages, "Leaking user_pages array");

	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
					   sizeof(struct page *),
					   GFP_KERNEL | __GFP_ZERO);
	if (!mem->user_pages) {
		pr_err("%s: Failed to allocate pages array\n", __func__);
		ret = -ENOMEM;
		goto unregister_out;
	}

	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
	if (ret) {
		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
		goto free_out;
		goto unregister_out;
	}

	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);

	ret = amdgpu_bo_reserve(bo, true);
	if (ret) {
		pr_err("%s: Failed to reserve BO\n", __func__);
@@ -533,11 +517,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
	amdgpu_bo_unreserve(bo);

release_out:
	if (ret)
		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
free_out:
	kvfree(mem->user_pages);
	mem->user_pages = NULL;
	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
	if (ret)
		amdgpu_mn_unregister(bo);
@@ -596,7 +576,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
	ctx->kfd_bo.priority = 0;
	ctx->kfd_bo.tv.bo = &bo->tbo;
	ctx->kfd_bo.tv.num_shared = 1;
	ctx->kfd_bo.user_pages = NULL;
	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -660,7 +639,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
	ctx->kfd_bo.priority = 0;
	ctx->kfd_bo.tv.bo = &bo->tbo;
	ctx->kfd_bo.tv.num_shared = 1;
	ctx->kfd_bo.user_pages = NULL;
	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

	i = 0;
@@ -1275,15 +1253,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
	list_del(&bo_list_entry->head);
	mutex_unlock(&process_info->lock);

	/* Free user pages if necessary */
	if (mem->user_pages) {
		pr_debug("%s: Freeing user_pages array\n", __func__);
		if (mem->user_pages[0])
			release_pages(mem->user_pages,
					mem->bo->tbo.ttm->num_pages);
		kvfree(mem->user_pages);
	}

	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
	if (unlikely(ret))
		return ret;
@@ -1757,25 +1726,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,

		bo = mem->bo;

		if (!mem->user_pages) {
			mem->user_pages =
				kvmalloc_array(bo->tbo.ttm->num_pages,
						 sizeof(struct page *),
						 GFP_KERNEL | __GFP_ZERO);
			if (!mem->user_pages) {
				pr_err("%s: Failed to allocate pages array\n",
				       __func__);
				return -ENOMEM;
			}
		} else if (mem->user_pages[0]) {
			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
		}

		/* Get updated user pages */
		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
						   mem->user_pages);
						   bo->tbo.ttm->pages);
		if (ret) {
			mem->user_pages[0] = NULL;
			bo->tbo.ttm->pages[0] = NULL;
			pr_info("%s: Failed to get user pages: %d\n",
				__func__, ret);
			/* Pretend it succeeded. It will fail later
@@ -1784,12 +1739,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
			 * stalled user mode queues.
			 */
		}

		/* Mark the BO as valid unless it was invalidated
		 * again concurrently
		 */
		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
			return -EAGAIN;
	}

	return 0;
@@ -1819,7 +1768,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
				     GFP_KERNEL);
	if (!pd_bo_list_entries) {
		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
		return -ENOMEM;
		ret = -ENOMEM;
		goto out_no_mem;
	}

	INIT_LIST_HEAD(&resv_list);
@@ -1843,7 +1793,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
	WARN(!list_empty(&duplicates), "Duplicates should be empty");
	if (ret)
		goto out;
		goto out_free;

	amdgpu_sync_create(&sync);

@@ -1859,10 +1809,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

		bo = mem->bo;

		/* Copy pages array and validate the BO if we got user pages */
		if (mem->user_pages[0]) {
			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
						     mem->user_pages);
		/* Validate the BO if we got user pages */
		if (bo->tbo.ttm->pages[0]) {
			amdgpu_bo_placement_from_domain(bo, mem->domain);
			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
			if (ret) {
@@ -1871,16 +1819,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
			}
		}

		/* Validate succeeded, now the BO owns the pages, free
		 * our copy of the pointer array. Put this BO back on
		 * the userptr_valid_list. If we need to revalidate
		 * it, we need to start from scratch.
		 */
		kvfree(mem->user_pages);
		mem->user_pages = NULL;
		list_move_tail(&mem->validate_list.head,
			       &process_info->userptr_valid_list);

		/* Stop HMM track the userptr update. We dont check the return
		 * value for concurrent CPU page table update because we will
		 * reschedule the restore worker if process_info->evicted_bos
		 * is updated.
		 */
		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

		/* Update mapping. If the BO was not validated
		 * (because we couldn't get user pages), this will
		 * clear the page table entries, which will result in
@@ -1910,8 +1858,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
	ttm_eu_backoff_reservation(&ticket, &resv_list);
	amdgpu_sync_wait(&sync, false);
	amdgpu_sync_free(&sync);
out:
out_free:
	kfree(pd_bo_list_entries);
out_no_mem:
	list_for_each_entry_safe(mem, tmp_mem,
				 &process_info->userptr_inval_list,
				 validate_list.head) {
		bo = mem->bo;
		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
	}

	return ret;
}
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
	struct amdgpu_bo_va		*bo_va;
	uint32_t			priority;
	struct page			**user_pages;
	int				user_invalidated;
	bool				user_invalidated;
};

struct amdgpu_bo_list {
+50 −88
Original line number Diff line number Diff line
@@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
	p->uf_entry.tv.bo = &bo->tbo;
	/* One for TTM and one for the CS job */
	p->uf_entry.tv.num_shared = 2;
	p->uf_entry.user_pages = NULL;

	drm_gem_object_put_unlocked(gobj);

@@ -540,14 +539,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
		if (usermm && usermm != current->mm)
			return -EPERM;

		/* Check if we have user pages and nobody bound the BO already */
		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
		    lobj->user_pages) {
		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
		    lobj->user_invalidated && lobj->user_pages) {
			amdgpu_bo_placement_from_domain(bo,
							AMDGPU_GEM_DOMAIN_CPU);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
			if (r)
				return r;

			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
						     lobj->user_pages);
			binding_userptr = true;
@@ -578,7 +577,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
	struct amdgpu_bo *gds;
	struct amdgpu_bo *gws;
	struct amdgpu_bo *oa;
	unsigned tries = 10;
	int r;

	INIT_LIST_HEAD(&p->validated);
@@ -614,79 +612,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
		list_add(&p->uf_entry.tv.head, &p->validated);

	while (1) {
		struct list_head need_pages;

		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
					   &duplicates);
		if (unlikely(r != 0)) {
			if (r != -ERESTARTSYS)
				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
			goto error_free_pages;
		}

		INIT_LIST_HEAD(&need_pages);
	/* Get userptr backing pages. If pages are updated after registered
	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
	 */
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
		bool userpage_invalidated = false;
		int i;

			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
				 &e->user_invalidated) && e->user_pages) {

				/* We acquired a page array, but somebody
				 * invalidated it. Free it and try again
				 */
				release_pages(e->user_pages,
					      bo->tbo.ttm->num_pages);
				kvfree(e->user_pages);
				e->user_pages = NULL;
			}

			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
			    !e->user_pages) {
				list_del(&e->tv.head);
				list_add(&e->tv.head, &need_pages);

				amdgpu_bo_unreserve(bo);
			}
		}

		if (list_empty(&need_pages))
			break;

		/* Unreserve everything again. */
		ttm_eu_backoff_reservation(&p->ticket, &p->validated);

		/* We tried too many times, just abort */
		if (!--tries) {
			r = -EDEADLK;
			DRM_ERROR("deadlock in %s\n", __func__);
			goto error_free_pages;
		}

		/* Fill the page arrays for all userptrs. */
		list_for_each_entry(e, &need_pages, tv.head) {
			struct ttm_tt *ttm = e->tv.bo->ttm;

			e->user_pages = kvmalloc_array(ttm->num_pages,
		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
					sizeof(struct page *),
					GFP_KERNEL | __GFP_ZERO);
		if (!e->user_pages) {
				r = -ENOMEM;
				DRM_ERROR("calloc failure in %s\n", __func__);
				goto error_free_pages;
			DRM_ERROR("calloc failure\n");
			return -ENOMEM;
		}

			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
		if (r) {
				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
			kvfree(e->user_pages);
			e->user_pages = NULL;
				goto error_free_pages;
			return r;
		}

		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
				userpage_invalidated = true;
				break;
			}
		}
		e->user_invalidated = userpage_invalidated;
	}

		/* And try again. */
		list_splice(&need_pages, &p->validated);
	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
				   &duplicates);
	if (unlikely(r != 0)) {
		if (r != -ERESTARTSYS)
			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
		goto out;
	}

	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -755,17 +719,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
error_validate:
	if (r)
		ttm_eu_backoff_reservation(&p->ticket, &p->validated);

error_free_pages:

	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
		if (!e->user_pages)
			continue;

		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
		kvfree(e->user_pages);
	}

out:
	return r;
}

@@ -1224,7 +1178,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
	struct amdgpu_bo_list_entry *e;
	struct amdgpu_job *job;
	uint64_t seq;

	int r;

	job = p->job;
@@ -1234,15 +1187,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
	if (r)
		goto error_unlock;

	/* No memory allocation is allowed while holding the mn lock */
	/* No memory allocation is allowed while holding the mn lock.
	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
	 * to BOs.
	 */
	amdgpu_mn_lock(p->mn);

	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
	 */
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
			r = -ERESTARTSYS;
			goto error_abort;
		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
	}
	if (r) {
		r = -EAGAIN;
		goto error_abort;
	}

	job->owner = p->filp;
@@ -1338,6 +1299,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)

out:
	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);

	return r;
}

+6 −8
Original line number Diff line number Diff line
@@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,

		r = amdgpu_bo_reserve(bo, true);
		if (r)
			goto free_pages;
			goto user_pages_done;

		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
		amdgpu_bo_unreserve(bo);
		if (r)
			goto free_pages;
			goto user_pages_done;
	}

	r = drm_gem_handle_create(filp, gobj, &handle);
	/* drop reference from allocate - handle holds it now */
	drm_gem_object_put_unlocked(gobj);
	if (r)
		return r;
		goto user_pages_done;

	args->handle = handle;
	return 0;

free_pages:
	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
user_pages_done:
	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

release_object:
	drm_gem_object_put_unlocked(gobj);
Loading