Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7e5a547f authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: implement the allocation range (v3)



Pass a ttm_placement pointer to amdgpu_bo_create_restricted
add min_offset to amdgpu_bo_pin_restricted.  This makes it
easier to allocate memory with address restrictions.  With
this patch we can also enable 2-ended allocation again.

v2: fix rebase conflicts
v3: memset placements before using

Reviewed-by: default avatarJammy Zhou <Jammy.Zhou@amd.com>
Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5fc3aeeb
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@
#include <ttm/ttm_execbuf_util.h>

#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>

#include "amd_shared.h"
#include "amdgpu_family.h"
@@ -542,12 +543,14 @@ struct amdgpu_bo_va {
	struct amdgpu_bo		*bo;
};

#define AMDGPU_GEM_DOMAIN_MAX		0x3

struct amdgpu_bo {
	/* Protected by gem.mutex */
	struct list_head		list;
	/* Protected by tbo.reserved */
	u32				initial_domain;
	struct ttm_place		placements[4];
	struct ttm_place		placements[AMDGPU_GEM_DOMAIN_MAX + 1];
	struct ttm_placement		placement;
	struct ttm_buffer_object	tbo;
	struct ttm_bo_kmap_obj		kmap;
+1 −1
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
		goto cleanup;
	}

	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, &base);
	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
	if (unlikely(r != 0)) {
		amdgpu_bo_unreserve(new_rbo);
		r = -EINVAL;
+1 −1
Original line number Diff line number Diff line
@@ -150,7 +150,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
	}


	ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL);
	ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, NULL);
	if (ret) {
		amdgpu_bo_unreserve(rbo);
		goto out_unref;
+119 −57
Original line number Diff line number Diff line
@@ -112,82 +112,111 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
	return false;
}

void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain)
static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
				      struct ttm_placement *placement,
				      struct ttm_place *placements,
				      u32 domain, u64 flags)
{
	u32 c = 0, i;
	rbo->placement.placement = rbo->placements;
	rbo->placement.busy_placement = rbo->placements;

	placement->placement = placements;
	placement->busy_placement = placements;

	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
		if (rbo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
			rbo->adev->mc.visible_vram_size < rbo->adev->mc.real_vram_size) {
			rbo->placements[c].fpfn =
				rbo->adev->mc.visible_vram_size >> PAGE_SHIFT;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
		if (flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
			adev->mc.visible_vram_size < adev->mc.real_vram_size) {
			placements[c].fpfn =
				adev->mc.visible_vram_size >> PAGE_SHIFT;
			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_VRAM;
		}
		rbo->placements[c].fpfn = 0;
		rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
		placements[c].fpfn = 0;
		placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
			TTM_PL_FLAG_VRAM;
	}

	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
		if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT |
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
			placements[c].fpfn = 0;
			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT |
				TTM_PL_FLAG_UNCACHED;
		} else {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
			placements[c].fpfn = 0;
			placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
		}
	}

	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
		if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM |
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) {
			placements[c].fpfn = 0;
			placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM |
				TTM_PL_FLAG_UNCACHED;
		} else {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags =  TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
			placements[c].fpfn = 0;
			placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
		}
	}

	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
		placements[c].fpfn = 0;
		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
			AMDGPU_PL_FLAG_GDS;
	}
	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
		placements[c].fpfn = 0;
		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
			AMDGPU_PL_FLAG_GWS;
	}
	if (domain & AMDGPU_GEM_DOMAIN_OA) {
		rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
		placements[c].fpfn = 0;
		placements[c++].flags = TTM_PL_FLAG_UNCACHED |
			AMDGPU_PL_FLAG_OA;
	}

	if (!c) {
		rbo->placements[c].fpfn = 0;
		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
		placements[c].fpfn = 0;
		placements[c++].flags = TTM_PL_MASK_CACHING |
			TTM_PL_FLAG_SYSTEM;
	}
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
	placement->num_placement = c;
	placement->num_busy_placement = c;

	for (i = 0; i < c; i++) {
		if ((rbo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
			(rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
			!rbo->placements[i].fpfn)
			rbo->placements[i].lpfn =
				rbo->adev->mc.visible_vram_size >> PAGE_SHIFT;
		if ((flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
			(placements[i].flags & TTM_PL_FLAG_VRAM) &&
			!placements[i].fpfn)
			placements[i].lpfn =
				adev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			rbo->placements[i].lpfn = 0;
			placements[i].lpfn = 0;
	}
}

int amdgpu_bo_create(struct amdgpu_device *adev,
		     unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags,
		     struct sg_table *sg, struct amdgpu_bo **bo_ptr)
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain)
{
	amdgpu_ttm_placement_init(rbo->adev, &rbo->placement,
				  rbo->placements, domain, rbo->flags);
}

static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
					struct ttm_placement *placement)
{
	BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1));

	memcpy(bo->placements, placement->placement,
	       placement->num_placement * sizeof(struct ttm_place));
	bo->placement.num_placement = placement->num_placement;
	bo->placement.num_busy_placement = placement->num_busy_placement;
	bo->placement.placement = bo->placements;
	bo->placement.busy_placement = bo->placements;
}

int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
				unsigned long size, int byte_align,
				bool kernel, u32 domain, u64 flags,
				struct sg_table *sg,
				struct ttm_placement *placement,
				struct amdgpu_bo **bo_ptr)
{
	struct amdgpu_bo *bo;
	enum ttm_bo_type type;
@@ -241,7 +270,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
				       AMDGPU_GEM_DOMAIN_OA);

	bo->flags = flags;
	amdgpu_ttm_placement_from_domain(bo, domain);
	amdgpu_fill_placement_to_bo(bo, placement);
	/* Kernel allocation are uninterruptible */
	down_read(&adev->pm.mclk_lock);
	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
@@ -258,6 +287,27 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
	return 0;
}

int amdgpu_bo_create(struct amdgpu_device *adev,
		     unsigned long size, int byte_align,
		     bool kernel, u32 domain, u64 flags,
		     struct sg_table *sg, struct amdgpu_bo **bo_ptr)
{
	struct ttm_placement placement = {0};
	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];

	memset(&placements, 0,
	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));

	amdgpu_ttm_placement_init(adev, &placement,
				  placements, domain, flags);

	return amdgpu_bo_create_restricted(adev, size, byte_align,
					   kernel, domain, flags,
					   sg,
					   &placement,
					   bo_ptr);
}

int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
	bool is_iomem;
@@ -313,14 +363,19 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
		*bo = NULL;
}

int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset,
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
			     u64 min_offset, u64 max_offset,
			     u64 *gpu_addr)
{
	int r, i;
	unsigned fpfn, lpfn;

	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
		return -EPERM;

	if (WARN_ON_ONCE(min_offset > max_offset))
		return -EINVAL;

	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
@@ -328,7 +383,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset,

		if (max_offset != 0) {
			u64 domain_start;

			if (domain == AMDGPU_GEM_DOMAIN_VRAM)
				domain_start = bo->adev->mc.vram_start;
			else
@@ -344,12 +398,20 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset,
		/* force to pin into visible video ram */
		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
			(!max_offset || max_offset > bo->adev->mc.visible_vram_size))
			bo->placements[i].lpfn =
				bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;

		    (!max_offset || max_offset > bo->adev->mc.visible_vram_size)) {
			if (WARN_ON_ONCE(min_offset >
					 bo->adev->mc.visible_vram_size))
				return -EINVAL;
			fpfn = min_offset >> PAGE_SHIFT;
			lpfn = bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
		} else {
			fpfn = min_offset >> PAGE_SHIFT;
			lpfn = max_offset >> PAGE_SHIFT;
		}
		if (fpfn > bo->placements[i].fpfn)
			bo->placements[i].fpfn = fpfn;
		if (lpfn && lpfn < bo->placements[i].lpfn)
			bo->placements[i].lpfn = lpfn;
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
	}

@@ -370,7 +432,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset,

int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
{
	return amdgpu_bo_pin_restricted(bo, domain, 0, gpu_addr);
	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
}

int amdgpu_bo_unpin(struct amdgpu_bo *bo)
+8 −1
Original line number Diff line number Diff line
@@ -130,13 +130,20 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
			    bool kernel, u32 domain, u64 flags,
			    struct sg_table *sg,
			    struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
				unsigned long size, int byte_align,
				bool kernel, u32 domain, u64 flags,
				struct sg_table *sg,
				struct ttm_placement *placement,
				struct amdgpu_bo **bo_ptr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr);
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
				    u64 max_offset, u64 *gpu_addr);
			     u64 min_offset, u64 max_offset,
			     u64 *gpu_addr);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
void amdgpu_bo_force_delete(struct amdgpu_device *adev);
Loading