Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3f3333f8 authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: track evicted page tables v2



Instead of validating all page tables when one was evicted,
track which one needs a validation.

v2: simplify amdgpu_vm_ready as well

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: default avatarChunming Zhou <david1.zhou@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 00b5cc83
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -636,9 +636,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,

	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
				     p->bytes_moved_vis);
	fpriv->vm.last_eviction_counter =
		atomic64_read(&p->adev->num_evictions);

	if (p->bo_list) {
		struct amdgpu_bo *gds = p->bo_list->gds_obj;
		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -835,7 +832,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
			if (!bo)
				continue;

			amdgpu_vm_bo_invalidate(adev, bo);
			amdgpu_vm_bo_invalidate(adev, bo, false);
		}
	}

@@ -860,7 +857,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
	}

	if (p->job->vm) {
		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);

		r = amdgpu_bo_vm_update_pte(p);
		if (r)
+4 −4
Original line number Diff line number Diff line
@@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
	if (bo_va && --bo_va->ref_count == 0) {
		amdgpu_vm_bo_rmv(adev, bo_va);

		if (amdgpu_vm_ready(adev, vm)) {
		if (amdgpu_vm_ready(vm)) {
			struct dma_fence *fence = NULL;

			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
				    struct list_head *list,
				    uint32_t operation)
{
	int r = -ERESTARTSYS;
	int r;

	if (!amdgpu_vm_ready(adev, vm))
		goto error;
	if (!amdgpu_vm_ready(vm))
		return;

	r = amdgpu_vm_update_directories(adev, vm);
	if (r)
+1 −1
Original line number Diff line number Diff line
@@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
		return;

	abo = container_of(bo, struct amdgpu_bo, tbo);
	amdgpu_vm_bo_invalidate(adev, abo);
	amdgpu_vm_bo_invalidate(adev, abo, evict);

	amdgpu_bo_kunmap(abo);

+103 −124
Original line number Diff line number Diff line
@@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
			 struct list_head *validated,
			 struct amdgpu_bo_list_entry *entry)
{
	entry->robj = vm->root.bo;
	entry->robj = vm->root.base.bo;
	entry->priority = 0;
	entry->tv.bo = &entry->robj->tbo;
	entry->tv.shared = true;
@@ -149,100 +149,56 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
}

/**
 * amdgpu_vm_validate_layer - validate a single page table level
 * amdgpu_vm_validate_pt_bos - validate the page table BOs
 *
 * @parent: parent page table level
 * @adev: amdgpu device pointer
 * @vm: vm providing the BOs
 * @validate: callback to do the validation
 * @param: parameter for the validation callback
 *
 * Validate the page table BOs on command submission if neccessary.
 */
static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
				    int (*validate)(void *, struct amdgpu_bo *),
				    void *param, bool use_cpu_for_update,
				    struct ttm_bo_global *glob)
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			      int (*validate)(void *p, struct amdgpu_bo *bo),
			      void *param)
{
	unsigned i;
	struct ttm_bo_global *glob = adev->mman.bdev.glob;
	int r;

	if (use_cpu_for_update) {
		r = amdgpu_bo_kmap(parent->bo, NULL);
		if (r)
			return r;
	}

	if (!parent->entries)
		return 0;

	for (i = 0; i <= parent->last_entry_used; ++i) {
		struct amdgpu_vm_pt *entry = &parent->entries[i];
	spin_lock(&vm->status_lock);
	while (!list_empty(&vm->evicted)) {
		struct amdgpu_vm_bo_base *bo_base;
		struct amdgpu_bo *bo;

		if (!entry->bo)
			continue;
		bo_base = list_first_entry(&vm->evicted,
					   struct amdgpu_vm_bo_base,
					   vm_status);
		spin_unlock(&vm->status_lock);

		r = validate(param, entry->bo);
		bo = bo_base->bo;
		BUG_ON(!bo);
		if (bo->parent) {
			r = validate(param, bo);
			if (r)
				return r;

			spin_lock(&glob->lru_lock);
		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
		if (entry->bo->shadow)
			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
			ttm_bo_move_to_lru_tail(&bo->tbo);
			if (bo->shadow)
				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
			spin_unlock(&glob->lru_lock);

		/*
		 * Recurse into the sub directory. This is harmless because we
		 * have only a maximum of 5 layers.
		 */
		r = amdgpu_vm_validate_level(entry, validate, param,
					     use_cpu_for_update, glob);
		if (r)
			return r;
		}

		if (vm->use_cpu_for_update) {
			r = amdgpu_bo_kmap(bo, NULL);
			if (r)
				return r;
		}

/**
 * amdgpu_vm_validate_pt_bos - validate the page table BOs
 *
 * @adev: amdgpu device pointer
 * @vm: vm providing the BOs
 * @validate: callback to do the validation
 * @param: parameter for the validation callback
 *
 * Validate the page table BOs on command submission if neccessary.
 */
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			      int (*validate)(void *p, struct amdgpu_bo *bo),
			      void *param)
{
	uint64_t num_evictions;

	/* We only need to validate the page tables
	 * if they aren't already valid.
	 */
	num_evictions = atomic64_read(&adev->num_evictions);
	if (num_evictions == vm->last_eviction_counter)
		return 0;

	return amdgpu_vm_validate_level(&vm->root, validate, param,
					vm->use_cpu_for_update,
					adev->mman.bdev.glob);
		spin_lock(&vm->status_lock);
		list_del_init(&bo_base->vm_status);
	}

/**
 * amdgpu_vm_check - helper for amdgpu_vm_ready
 */
static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
{
	/* if anything is swapped out don't swap it in here,
	   just abort and wait for the next CS */
	if (!amdgpu_bo_gpu_accessible(bo))
		return -ERESTARTSYS;

	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
		return -ERESTARTSYS;
	spin_unlock(&vm->status_lock);

	return 0;
}
@@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
/**
 * amdgpu_vm_ready - check VM is ready for updates
 *
 * @adev: amdgpu device
 * @vm: VM to check
 *
 * Check if all VM PDs/PTs are ready for updates
 */
bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
	if (amdgpu_vm_check(NULL, vm->root.bo))
		return false;
	bool ready;

	spin_lock(&vm->status_lock);
	ready = list_empty(&vm->evicted);
	spin_unlock(&vm->status_lock);

	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
	return ready;
}

/**
@@ -326,11 +284,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,

	/* walk over the address space and allocate the page tables */
	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
		struct reservation_object *resv = vm->root.bo->tbo.resv;
		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
		struct amdgpu_bo *pt;

		if (!entry->bo) {
		if (!entry->base.bo) {
			r = amdgpu_bo_create(adev,
					     amdgpu_vm_bo_size(adev, level),
					     AMDGPU_GPU_PAGE_SIZE, true,
@@ -351,9 +309,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
			/* Keep a reference to the root directory to avoid
			* freeing them up in the wrong order.
			*/
			pt->parent = amdgpu_bo_ref(vm->root.bo);
			pt->parent = amdgpu_bo_ref(vm->root.base.bo);

			entry->bo = pt;
			entry->base.vm = vm;
			entry->base.bo = pt;
			list_add_tail(&entry->base.bo_list, &pt->va);
			INIT_LIST_HEAD(&entry->base.vm_status);
			entry->addr = 0;
		}

@@ -1020,7 +981,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
	int r;

	amdgpu_sync_create(&sync);
	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
	r = amdgpu_sync_wait(&sync, true);
	amdgpu_sync_free(&sync);

@@ -1059,10 +1020,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,

	memset(&params, 0, sizeof(params));
	params.adev = adev;
	shadow = parent->bo->shadow;
	shadow = parent->base.bo->shadow;

	if (vm->use_cpu_for_update) {
		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
		if (unlikely(r))
			return r;
@@ -1078,7 +1039,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
		/* assume the worst case */
		ndw += parent->last_entry_used * 6;

		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);

		if (shadow) {
			shadow_addr = amdgpu_bo_gpu_offset(shadow);
@@ -1098,7 +1059,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,

	/* walk over the address space and update the directory */
	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
		uint64_t pde, pt;

		if (bo == NULL)
@@ -1141,7 +1102,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
	}

	if (count) {
		if (vm->root.bo->shadow)
		if (vm->root.base.bo->shadow)
			params.func(&params, last_shadow, last_pt,
				    count, incr, AMDGPU_PTE_VALID);

@@ -1154,7 +1115,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
			amdgpu_job_free(job);
		} else {
			amdgpu_ring_pad_ib(ring, params.ib);
			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
			amdgpu_sync_resv(adev, &job->sync,
					 parent->base.bo->tbo.resv,
					 AMDGPU_FENCE_OWNER_VM);
			if (shadow)
				amdgpu_sync_resv(adev, &job->sync,
@@ -1167,7 +1129,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
			if (r)
				goto error_free;

			amdgpu_bo_fence(parent->bo, fence, true);
			amdgpu_bo_fence(parent->base.bo, fence, true);
			dma_fence_put(vm->last_dir_update);
			vm->last_dir_update = dma_fence_get(fence);
			dma_fence_put(fence);
@@ -1180,7 +1142,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];

		if (!entry->bo)
		if (!entry->base.bo)
			continue;

		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
@@ -1213,7 +1175,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];

		if (!entry->bo)
		if (!entry->base.bo)
			continue;

		entry->addr = ~0ULL;
@@ -1268,7 +1230,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
	*entry = &p->vm->root;
	while ((*entry)->entries) {
		idx = addr >> (p->adev->vm_manager.block_size * level--);
		idx %= amdgpu_bo_size((*entry)->bo) / 8;
		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
		*parent = *entry;
		*entry = &(*entry)->entries[idx];
	}
@@ -1304,7 +1266,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
	    p->src ||
	    !(flags & AMDGPU_PTE_VALID)) {

		dst = amdgpu_bo_gpu_offset(entry->bo);
		dst = amdgpu_bo_gpu_offset(entry->base.bo);
		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
		flags = AMDGPU_PTE_VALID;
	} else {
@@ -1330,18 +1292,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
		tmp = p->pages_addr;
		p->pages_addr = NULL;

		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
		pde = pd_addr + (entry - parent->entries) * 8;
		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);

		p->pages_addr = tmp;
	} else {
		if (parent->bo->shadow) {
			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
		if (parent->base.bo->shadow) {
			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
			pde = pd_addr + (entry - parent->entries) * 8;
			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
		}
		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
		pde = pd_addr + (entry - parent->entries) * 8;
		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
	}
@@ -1392,7 +1354,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
		if (entry->addr & AMDGPU_PDE_PTE)
			continue;

		pt = entry->bo;
		pt = entry->base.bo;
		if (use_cpu_update) {
			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
		} else {
@@ -1612,12 +1574,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
	if (r)
		goto error_free;

	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
			     owner);
	if (r)
		goto error_free;

	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
	if (r)
		goto error_free;

@@ -1632,7 +1594,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
	if (r)
		goto error_free;

	amdgpu_bo_fence(vm->root.bo, f, true);
	amdgpu_bo_fence(vm->root.base.bo, f, true);
	dma_fence_put(*fence);
	*fence = f;
	return 0;
@@ -1927,7 +1889,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
 */
static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
	struct reservation_object *resv = vm->root.bo->tbo.resv;
	struct reservation_object *resv = vm->root.base.bo->tbo.resv;
	struct dma_fence *excl, **shared;
	unsigned i, shared_count;
	int r;
@@ -2414,12 +2376,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 * Mark @bo as invalid.
 */
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
			     struct amdgpu_bo *bo)
			     struct amdgpu_bo *bo, bool evicted)
{
	struct amdgpu_vm_bo_base *bo_base;

	list_for_each_entry(bo_base, &bo->va, bo_list) {
		struct amdgpu_vm *vm = bo_base->vm;

		bo_base->moved = true;
		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
			spin_lock(&bo_base->vm->status_lock);
			list_move(&bo_base->vm_status, &vm->evicted);
			spin_unlock(&bo_base->vm->status_lock);
			continue;
		}

		/* Don't add page tables to the moved state */
		if (bo->tbo.type == ttm_bo_type_kernel)
			continue;

		spin_lock(&bo_base->vm->status_lock);
		list_move(&bo_base->vm_status, &bo_base->vm->moved);
		spin_unlock(&bo_base->vm->status_lock);
@@ -2507,6 +2482,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
		vm->reserved_vmid[i] = NULL;
	spin_lock_init(&vm->status_lock);
	INIT_LIST_HEAD(&vm->evicted);
	INIT_LIST_HEAD(&vm->moved);
	INIT_LIST_HEAD(&vm->freed);

@@ -2551,30 +2527,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
			     AMDGPU_GEM_DOMAIN_VRAM,
			     flags,
			     NULL, NULL, init_pde_value, &vm->root.bo);
			     NULL, NULL, init_pde_value, &vm->root.base.bo);
	if (r)
		goto error_free_sched_entity;

	r = amdgpu_bo_reserve(vm->root.bo, false);
	vm->root.base.vm = vm;
	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
	INIT_LIST_HEAD(&vm->root.base.vm_status);

	if (vm->use_cpu_for_update) {
		r = amdgpu_bo_reserve(vm->root.base.bo, false);
		if (r)
			goto error_free_root;

	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);

	if (vm->use_cpu_for_update) {
		r = amdgpu_bo_kmap(vm->root.bo, NULL);
		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
		if (r)
			goto error_free_root;
		amdgpu_bo_unreserve(vm->root.base.bo);
	}

	amdgpu_bo_unreserve(vm->root.bo);

	return 0;

error_free_root:
	amdgpu_bo_unref(&vm->root.bo->shadow);
	amdgpu_bo_unref(&vm->root.bo);
	vm->root.bo = NULL;
	amdgpu_bo_unref(&vm->root.base.bo->shadow);
	amdgpu_bo_unref(&vm->root.base.bo);
	vm->root.base.bo = NULL;

error_free_sched_entity:
	amd_sched_entity_fini(&ring->sched, &vm->entity);
@@ -2593,9 +2570,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
{
	unsigned i;

	if (level->bo) {
		amdgpu_bo_unref(&level->bo->shadow);
		amdgpu_bo_unref(&level->bo);
	if (level->base.bo) {
		list_del(&level->base.bo_list);
		list_del(&level->base.vm_status);
		amdgpu_bo_unref(&level->base.bo->shadow);
		amdgpu_bo_unref(&level->base.bo);
	}

	if (level->entries)
+9 −7
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ struct amdgpu_vm_bo_base {
};

struct amdgpu_vm_pt {
	struct amdgpu_bo	*bo;
	struct amdgpu_vm_bo_base	base;
	uint64_t			addr;

	/* array of page tables, one for each directory entry */
@@ -126,6 +126,9 @@ struct amdgpu_vm {
	/* protecting invalidated */
	spinlock_t		status_lock;

	/* BOs who needs a validation */
	struct list_head	evicted;

	/* BOs moved, but not yet updated in the PT */
	struct list_head	moved;

@@ -135,7 +138,6 @@ struct amdgpu_vm {
	/* contains the page directory */
	struct amdgpu_vm_pt     root;
	struct dma_fence	*last_dir_update;
	uint64_t		last_eviction_counter;

	/* protecting freed */
	spinlock_t		freed_lock;
@@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
			 struct list_head *validated,
			 struct amdgpu_bo_list_entry *entry);
bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			      int (*callback)(void *p, struct amdgpu_bo *bo),
			      void *param);
@@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
			struct amdgpu_bo_va *bo_va,
			bool clear);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
			     struct amdgpu_bo *bo);
			     struct amdgpu_bo *bo, bool evicted);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
				       struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,