Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2f568dbd authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6



That avoids lock inversion between the BO reservation lock
and the anon_vma lock.

v2:
* Changed amdgpu_bo_list_entry.user_pages to an array of pointers
* Lock mmap_sem only for get_user_pages
* Added invalidation of unbound userpointer BOs
* Fixed memory leak and page reference leak

v3 (chk):
* Revert locking mmap_sem only for_get user_pages
* Revert adding invalidation of unbound userpointer BOs
* Sanitize and fix error handling

v4 (chk):
* Init userpages pointer everywhere.
* Fix error handling when get_user_pages() fails.
* Add invalidation of unbound userpointer BOs again.

v5 (chk):
* Add maximum number of tries.

v6 (chk):
* Fix error handling when we run out of tries.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v4)
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d564a06e
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry {
	struct ttm_validate_buffer	tv;
	struct amdgpu_bo_va		*bo_va;
	uint32_t			priority;
	struct page			**user_pages;
	int				user_invalidated;
};

struct amdgpu_bo_va_mapping {
@@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
		       struct amdgpu_ring **out_ring);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
				     uint32_t flags);
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
				  unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
				       int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
				 struct ttm_mem_reg *mem);
+1 −0
Original line number Diff line number Diff line
@@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,

		list_add_tail(&list->array[i].tv.head,
			      &bucket[priority]);
		list->array[i].user_pages = NULL;
	}

	/* Connect the sorted buckets in the output list. */
+110 −4
Original line number Diff line number Diff line
@@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
	p->uf_entry.priority = 0;
	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
	p->uf_entry.tv.shared = true;
	p->uf_entry.user_pages = NULL;

	drm_gem_object_unreference_unlocked(gobj);
	return 0;
@@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,

	list_for_each_entry(lobj, validated, tv.head) {
		struct amdgpu_bo *bo = lobj->robj;
		bool binding_userptr = false;
		struct mm_struct *usermm;
		uint32_t domain;

@@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
		if (usermm && usermm != current->mm)
			return -EPERM;

		/* Check if we have user pages and nobody bound the BO already */
		if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
			size_t size = sizeof(struct page *);

			size *= bo->tbo.ttm->num_pages;
			memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
			binding_userptr = true;
		}

		if (bo->pin_count)
			continue;

@@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
			}
			return r;
		}

		if (binding_userptr) {
			drm_free_large(lobj->user_pages);
			lobj->user_pages = NULL;
		}
	}
	return 0;
}
@@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
				union drm_amdgpu_cs *cs)
{
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_bo_list_entry *e;
	struct list_head duplicates;
	bool need_mmap_lock = false;
	unsigned i, tries = 10;
	int r;

	INIT_LIST_HEAD(&p->validated);
@@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
	if (need_mmap_lock)
		down_read(&current->mm->mmap_sem);

	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
	while (1) {
		struct list_head need_pages;
		unsigned i;

		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
					   &duplicates);
		if (unlikely(r != 0))
		goto error_reserve;
			goto error_free_pages;

		/* Without a BO list we don't have userptr BOs */
		if (!p->bo_list)
			break;

		INIT_LIST_HEAD(&need_pages);
		for (i = p->bo_list->first_userptr;
		     i < p->bo_list->num_entries; ++i) {

			e = &p->bo_list->array[i];

			if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
				 &e->user_invalidated) && e->user_pages) {

				/* We acquired a page array, but somebody
				 * invalidated it. Free it an try again
				 */
				release_pages(e->user_pages,
					      e->robj->tbo.ttm->num_pages,
					      false);
				drm_free_large(e->user_pages);
				e->user_pages = NULL;
			}

			if (e->robj->tbo.ttm->state != tt_bound &&
			    !e->user_pages) {
				list_del(&e->tv.head);
				list_add(&e->tv.head, &need_pages);

				amdgpu_bo_unreserve(e->robj);
			}
		}

		if (list_empty(&need_pages))
			break;

		/* Unreserve everything again. */
		ttm_eu_backoff_reservation(&p->ticket, &p->validated);

		/* We tried to often, just abort */
		if (!--tries) {
			r = -EDEADLK;
			goto error_free_pages;
		}

		/* Fill the page arrays for all useptrs. */
		list_for_each_entry(e, &need_pages, tv.head) {
			struct ttm_tt *ttm = e->robj->tbo.ttm;

			e->user_pages = drm_calloc_large(ttm->num_pages,
							 sizeof(struct page*));
			if (!e->user_pages) {
				r = -ENOMEM;
				goto error_free_pages;
			}

			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
			if (r) {
				drm_free_large(e->user_pages);
				e->user_pages = NULL;
				goto error_free_pages;
			}
		}

		/* And try again. */
		list_splice(&need_pages, &p->validated);
	}

	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);

@@ -398,10 +488,26 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
	}

error_reserve:
error_free_pages:

	if (need_mmap_lock)
		up_read(&current->mm->mmap_sem);

	if (p->bo_list) {
		for (i = p->bo_list->first_userptr;
		     i < p->bo_list->num_entries; ++i) {
			e = &p->bo_list->array[i];

			if (!e->user_pages)
				continue;

			release_pages(e->user_pages,
				      e->robj->tbo.ttm->num_pages,
				      false);
			drm_free_large(e->user_pages);
		}
	}

	return r;
}

+17 −6
Original line number Diff line number Diff line
@@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,

	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
		down_read(&current->mm->mmap_sem);

		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
						 bo->tbo.ttm->pages);
		if (r)
			goto unlock_mmap_sem;

		r = amdgpu_bo_reserve(bo, true);
		if (r) {
			up_read(&current->mm->mmap_sem);
			goto release_object;
		}
		if (r)
			goto free_pages;

		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
		amdgpu_bo_unreserve(bo);
		up_read(&current->mm->mmap_sem);
		if (r)
			goto release_object;
			goto free_pages;

		up_read(&current->mm->mmap_sem);
	}

	r = drm_gem_handle_create(filp, gobj, &handle);
@@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
	args->handle = handle;
	return 0;

free_pages:
	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);

unlock_mmap_sem:
	up_read(&current->mm->mmap_sem);

release_object:
	drm_gem_object_unreference_unlocked(gobj);

+39 −14
Original line number Diff line number Diff line
@@ -508,22 +508,18 @@ struct amdgpu_ttm_tt {
	uint32_t		userflags;
	spinlock_t              guptasklock;
	struct list_head        guptasks;
	atomic_t		mmu_invalidations;
};

/* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	unsigned pinned = 0, nents;
	int r;

	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
	unsigned pinned = 0;
	int r;

	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
		/* check that we only pin down anonymous memory
		/* check that we only use anonymous memory
		   to prevent problems with writeback */
		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
		struct vm_area_struct *vma;
@@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
	do {
		unsigned num_pages = ttm->num_pages - pinned;
		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
		struct page **pages = ttm->pages + pinned;
		struct page **p = pages + pinned;
		struct amdgpu_ttm_gup_task_list guptask;

		guptask.task = current;
@@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
		spin_unlock(&gtt->guptasklock);

		r = get_user_pages(current, current->mm, userptr, num_pages,
				   write, 0, pages, NULL);
				   write, 0, p, NULL);

		spin_lock(&gtt->guptasklock);
		list_del(&guptask.list);
@@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)

	} while (pinned < ttm->num_pages);

	return 0;

release_pages:
	release_pages(pages, pinned, 0);
	return r;
}

/* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	unsigned nents;
	int r;

	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
				      ttm->num_pages << PAGE_SHIFT,
				      GFP_KERNEL);
@@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)

release_sg:
	kfree(ttm->sg);

release_pages:
	release_pages(ttm->pages, pinned, 0);
	return r;
}

@@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
	gtt->userflags = flags;
	spin_lock_init(&gtt->guptasklock);
	INIT_LIST_HEAD(&gtt->guptasks);
	atomic_set(&gtt->mmu_invalidations, 0);

	return 0;
}
@@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
	}
	spin_unlock(&gtt->guptasklock);

	atomic_inc(&gtt->mmu_invalidations);

	return true;
}

bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
				       int *last_invalidated)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int prev_invalidated = *last_invalidated;

	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
	return prev_invalidated != *last_invalidated;
}

bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
Loading