drm/amdgpu: enable huge page handling in the VM v5 (cf2f0a37) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

+100 −20

Original line number	Diff line number	Diff line
		@@ -352,6 +352,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,

		entry->bo = pt;
		entry->addr = 0;
		entry->huge_page = false;
		}

		if (level < adev->vm_manager.num_level) {
		@@ -1117,7 +1118,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,

		pt = amdgpu_bo_gpu_offset(bo);
		pt = amdgpu_gart_get_vm_pde(adev, pt);
		if (parent->entries[pt_idx].addr == pt)
		if (parent->entries[pt_idx].addr == pt \|\|
		parent->entries[pt_idx].huge_page)
		continue;

		parent->entries[pt_idx].addr = pt;
		@@ -1258,29 +1260,95 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
		}

		/**
		* amdgpu_vm_find_pt - find the page table for an address
		* amdgpu_vm_find_entry - find the entry for an address
		*
		* @p: see amdgpu_pte_update_params definition
		* @addr: virtual address in question
		* @entry: resulting entry or NULL
		* @parent: parent entry
		*
		* Find the page table BO for a virtual address, return NULL when none found.
		* Find the vm_pt entry and it's parent for the given address.
		*/
		static struct amdgpu_bo amdgpu_vm_get_pt(struct amdgpu_pte_update_params p,
		uint64_t addr)
		void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
		struct amdgpu_vm_pt **entry,
		struct amdgpu_vm_pt **parent)
		{
		struct amdgpu_vm_pt *entry = &p->vm->root;
		unsigned idx, level = p->adev->vm_manager.num_level;

		while (entry->entries) {
		*parent = NULL;
		*entry = &p->vm->root;
		while ((*entry)->entries) {
		idx = addr >> (p->adev->vm_manager.block_size * level--);
		idx %= amdgpu_bo_size(entry->bo) / 8;
		entry = &entry->entries[idx];
		idx %= amdgpu_bo_size((*entry)->bo) / 8;
		parent = entry;
		entry = &(entry)->entries[idx];
		}

		if (level)
		return NULL;
		*entry = NULL;
		}

		/**
		* amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
		*
		* @p: see amdgpu_pte_update_params definition
		* @entry: vm_pt entry to check
		* @parent: parent entry
		* @nptes: number of PTEs updated with this operation
		* @dst: destination address where the PTEs should point to
		* @flags: access flags fro the PTEs
		*
		* Check if we can update the PD with a huge page.
		*/
		static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
		struct amdgpu_vm_pt *entry,
		struct amdgpu_vm_pt *parent,
		unsigned nptes, uint64_t dst,
		uint64_t flags)
		{
		bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
		uint64_t pd_addr, pde;
		int r;

		/* In the case of a mixed PT the PDE must point to it*/
		if (p->adev->asic_type < CHIP_VEGA10 \|\|
		nptes != AMDGPU_VM_PTE_COUNT(p->adev) \|\|
		p->func == amdgpu_vm_do_copy_ptes \|\|
		!(flags & AMDGPU_PTE_VALID)) {

		return entry->bo;
		dst = amdgpu_bo_gpu_offset(entry->bo);
		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
		flags = AMDGPU_PTE_VALID;
		} else {
		flags \|= AMDGPU_PDE_PTE;
		}

		if (entry->addr == dst &&
		entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
		return 0;

		entry->addr = dst;
		entry->huge_page = !!(flags & AMDGPU_PDE_PTE);

		if (use_cpu_update) {
		r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
		if (r)
		return r;

		pde = pd_addr + (entry - parent->entries) * 8;
		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
		} else {
		if (parent->bo->shadow) {
		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
		pde = pd_addr + (entry - parent->entries) * 8;
		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
		}
		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
		pde = pd_addr + (entry - parent->entries) * 8;
		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
		}

		return 0;
		}

		/**
		@@ -1307,21 +1375,31 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
		struct amdgpu_bo *pt;
		unsigned nptes;
		bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);

		int r;

		/* walk over the address space and update the page tables */
		for (addr = start; addr < end; addr += nptes) {
		pt = amdgpu_vm_get_pt(params, addr);
		if (!pt) {
		pr_err("PT not found, aborting update_ptes\n");
		return -EINVAL;
		}
		for (addr = start; addr < end; addr += nptes,
		dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
		struct amdgpu_vm_pt entry, parent;

		amdgpu_vm_get_entry(params, addr, &entry, &parent);
		if (!entry)
		return -ENOENT;

		if ((addr & ~mask) == (end & ~mask))
		nptes = end - addr;
		else
		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);

		r = amdgpu_vm_handle_huge_pages(params, entry, parent,
		nptes, dst, flags);
		if (r)
		return r;

		if (entry->huge_page)
		continue;

		pt = entry->bo;
		if (use_cpu_update) {
		pe_start = (unsigned long)pt->kptr;
		} else {
		@@ -1337,8 +1415,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
		pe_start += (addr & mask) * 8;
		params->func(params, pe_start, dst, nptes,
		AMDGPU_GPU_PAGE_SIZE, flags);

		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
		}

		return 0;
		@@ -1490,6 +1566,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
		/* padding, etc. */
		ndw = 64;

		/* one PDE write for each huge page */
		ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;

		if (src) {
		/* only copy commands needed */
		ndw += ncmds * 7;
		@@ -1569,6 +1648,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

		error_free:
		amdgpu_job_free(job);
		amdgpu_vm_invalidate_level(&vm->root);
		return r;
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
		/* TILED for VEGA10, reserved for older ASICs */
		#define AMDGPU_PTE_PRT (1ULL << 51)

		/* PDE is handled as PTE for VEGA10 */
		#define AMDGPU_PDE_PTE (1ULL << 54)

		/* VEGA10 only */
		#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
		#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
		@@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry;
		struct amdgpu_vm_pt {
		struct amdgpu_bo *bo;
		uint64_t addr;
		bool huge_page;

		/* array of page tables, one for each directory entry */
		struct amdgpu_vm_pt *entries;