Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6585d427 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux into drm-next

Last set of features for 4.15.  Highlights:
- Add a bo flag to allow buffers to opt out of implicit sync
- Add ctx priority setting interface
- Lots more powerplay cleanups
- Start to plumb through vram lost infrastructure for gpu reset
- ttm support for huge pages
- misc cleanups and bug fixes

* 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux: (73 commits)
  drm/amd/powerplay: Place the constant on the right side of the test
  drm/amd/powerplay: Remove useless variable
  drm/amd/powerplay: Don't cast kzalloc() return value
  drm/amdgpu: allow GTT overcommit during bind
  drm/amdgpu: linear validate first then bind to GART
  drm/amd/pp: Fix overflow when setup decf/pix/disp dpm table.
  drm/amd/pp: thermal control not enabled on vega10.
  drm/amdgpu: busywait KIQ register accessing (v4)
  drm/amdgpu: report more amdgpu_fence_info
  drm/amdgpu:don't check soft_reset for sriov
  drm/amdgpu:fix duplicated setting job's vram_lost
  drm/amdgpu:reduce wb to 512 slot
  drm/amdgpu: fix regresstion on SR-IOV gpu reset failed
  drm/amd/powerplay: Tidy up cz_dpm_powerup_vce()
  drm/amd/powerplay: Tidy up cz_dpm_powerdown_vce()
  drm/amd/powerplay: Tidy up cz_dpm_update_vce_dpm()
  drm/amd/powerplay: Tidy up cz_dpm_update_uvd_dpm()
  drm/amd/powerplay: Tidy up cz_dpm_powerup_uvd()
  drm/amd/powerplay: Tidy up cz_dpm_powerdown_uvd()
  drm/amd/powerplay: Tidy up cz_start_dpm()
  ...
parents 40d86701 96687ec0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
	amdgpu_queue_mgr.o amdgpu_vf_error.o
	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o

# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
+31 −6
Original line number Diff line number Diff line
@@ -732,10 +732,14 @@ struct amdgpu_ctx {
	struct amdgpu_device    *adev;
	struct amdgpu_queue_mgr queue_mgr;
	unsigned		reset_counter;
	uint32_t		vram_lost_counter;
	spinlock_t		ring_lock;
	struct dma_fence	**fences;
	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
	bool			preamble_presented;
	enum amd_sched_priority init_priority;
	enum amd_sched_priority override_priority;
	struct mutex            lock;
};

struct amdgpu_ctx_mgr {
@@ -752,13 +756,18 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
			      struct dma_fence *fence, uint64_t *seq);
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
				   struct amdgpu_ring *ring, uint64_t seq);
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
				  enum amd_sched_priority priority);

int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
		     struct drm_file *filp);

int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);

void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);


/*
 * file private structure
 */
@@ -770,7 +779,6 @@ struct amdgpu_fpriv {
	struct mutex		bo_list_lock;
	struct idr		bo_list_handles;
	struct amdgpu_ctx_mgr	ctx_mgr;
	u32			vram_lost_counter;
};

/*
@@ -871,7 +879,7 @@ struct amdgpu_mec {
struct amdgpu_kiq {
	u64			eop_gpu_addr;
	struct amdgpu_bo	*eop_obj;
	struct mutex		ring_mutex;
	spinlock_t              ring_lock;
	struct amdgpu_ring	ring;
	struct amdgpu_irq_src	irq;
};
@@ -1035,6 +1043,10 @@ struct amdgpu_gfx {
	bool                            in_suspend;
	/* NGG */
	struct amdgpu_ngg		ngg;

	/* pipe reservation */
	struct mutex			pipe_reserve_mutex;
	DECLARE_BITMAP			(pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};

int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1113,6 +1125,7 @@ struct amdgpu_job {
	uint32_t		gds_base, gds_size;
	uint32_t		gws_base, gws_size;
	uint32_t		oa_base, oa_size;
	uint32_t		vram_lost_counter;

	/* user fence handling */
	uint64_t		uf_addr;
@@ -1138,7 +1151,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
/*
 * Writeback
 */
#define AMDGPU_MAX_WB 1024	/* Reserve at most 1024 WB slots for amdgpu-owned rings. */
#define AMDGPU_MAX_WB 512	/* Reserve at most 512 WB slots for amdgpu-owned rings. */

struct amdgpu_wb {
	struct amdgpu_bo	*wb_obj;
@@ -1378,6 +1391,18 @@ struct amdgpu_atcs {
	struct amdgpu_atcs_functions functions;
};

/*
 * Firmware VRAM reservation
 */
struct amdgpu_fw_vram_usage {
	u64 start_offset;
	u64 size;
	struct amdgpu_bo *reserved_bo;
	void *va;
};

int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev);

/*
 * CGS
 */
@@ -1582,6 +1607,8 @@ struct amdgpu_device {
	struct delayed_work     late_init_work;

	struct amdgpu_virt	virt;
	/* firmware VRAM reservation */
	struct amdgpu_fw_vram_usage fw_vram_usage;

	/* link all shadow bo */
	struct list_head                shadow_list;
@@ -1833,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; }
extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
extern const int amdgpu_max_kms_ioctl;

bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
			  struct amdgpu_fpriv *fpriv);
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
void amdgpu_driver_lastclose_kms(struct drm_device *dev);
+17 −1
Original line number Diff line number Diff line
@@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
	uint16_t data_offset;
	int usage_bytes = 0;
	struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
	u64 start_addr;
	u64 size;

	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
		firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
@@ -1815,8 +1817,22 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
			  le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
			  le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));

		start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
		size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb;

		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
			/* Firmware request VRAM reservation for SR-IOV */
			adev->fw_vram_usage.start_offset = (start_addr &
				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
			adev->fw_vram_usage.size = size << 10;
			/* Use the default scratch size */
			usage_bytes = 0;
		} else {
			usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
		}
	}
	ctx->scratch_size_bytes = 0;
	if (usage_bytes == 0)
		usage_bytes = 20 * 1024;
+91 −81
Original line number Diff line number Diff line
@@ -90,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
		goto free_chunk;
	}

	mutex_lock(&p->ctx->lock);

	/* get chunks */
	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
	if (copy_from_user(chunk_array, chunk_array_user,
			   sizeof(uint64_t)*cs->in.num_chunks)) {
		ret = -EFAULT;
		goto put_ctx;
		goto free_chunk;
	}

	p->nchunks = cs->in.num_chunks;
@@ -103,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
			    GFP_KERNEL);
	if (!p->chunks) {
		ret = -ENOMEM;
		goto put_ctx;
		goto free_chunk;
	}

	for (i = 0; i < p->nchunks; i++) {
@@ -170,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
	if (ret)
		goto free_all_kdata;

	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
		ret = -ECANCELED;
		goto free_all_kdata;
	}

	if (p->uf_entry.robj)
		p->job->uf_addr = uf_offset;
	kfree(chunk_array);
@@ -183,8 +190,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
	kfree(p->chunks);
	p->chunks = NULL;
	p->nchunks = 0;
put_ctx:
	amdgpu_ctx_put(p->ctx);
free_chunk:
	kfree(chunk_array);

@@ -705,7 +710,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)

	list_for_each_entry(e, &p->validated, tv.head) {
		struct reservation_object *resv = e->robj->tbo.resv;
		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
				     amdgpu_bo_explicit_sync(e->robj));

		if (r)
			return r;
@@ -736,8 +742,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,

	dma_fence_put(parser->fence);

	if (parser->ctx)
	if (parser->ctx) {
		mutex_unlock(&parser->ctx->lock);
		amdgpu_ctx_put(parser->ctx);
	}
	if (parser->bo_list)
		amdgpu_bo_list_put(parser->bo_list);

@@ -844,14 +852,58 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_vm *vm = &fpriv->vm;
	struct amdgpu_ring *ring = p->job->ring;
	int i, r;
	int r;

	/* Only for UVD/VCE VM emulation */
	if (ring->funcs->parse_cs) {
		for (i = 0; i < p->job->num_ibs; i++) {
			r = amdgpu_ring_parse_cs(ring, p, i);
	if (p->job->ring->funcs->parse_cs) {
		unsigned i, j;

		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
			struct amdgpu_bo_va_mapping *m;
			struct amdgpu_bo *aobj = NULL;
			struct amdgpu_cs_chunk *chunk;
			struct amdgpu_ib *ib;
			uint64_t offset;
			uint8_t *kptr;

			chunk = &p->chunks[i];
			ib = &p->job->ibs[j];
			chunk_ib = chunk->kdata;

			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
				continue;

			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
						   &aobj, &m);
			if (r) {
				DRM_ERROR("IB va_start is invalid\n");
				return r;
			}

			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
				return -EINVAL;
			}

			/* the IB should be reserved at this point */
			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
			if (r) {
				return r;
			}

			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
			kptr += chunk_ib->va_start - offset;

			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
			amdgpu_bo_kunmap(aobj);

			r = amdgpu_ring_parse_cs(ring, p, j);
			if (r)
				return r;

			j++;
		}
	}

@@ -918,54 +970,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

		parser->job->ring = ring;

		if (ring->funcs->parse_cs) {
			struct amdgpu_bo_va_mapping *m;
			struct amdgpu_bo *aobj = NULL;
			uint64_t offset;
			uint8_t *kptr;

			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
						   &aobj, &m);
			if (r) {
				DRM_ERROR("IB va_start is invalid\n");
				return r;
			}

			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
				return -EINVAL;
			}

			/* the IB should be reserved at this point */
			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
			if (r) {
				return r;
			}

			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
			kptr += chunk_ib->va_start - offset;

			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
		r =  amdgpu_ib_get(adev, vm,
					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
					ib);
		if (r) {
			DRM_ERROR("Failed to get ib !\n");
			return r;
		}

			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
			amdgpu_bo_kunmap(aobj);
		} else {
			r =  amdgpu_ib_get(adev, vm, 0, ib);
			if (r) {
				DRM_ERROR("Failed to get ib !\n");
				return r;
			}

		}

		ib->gpu_addr = chunk_ib->va_start;
		ib->length_dw = chunk_ib->ib_bytes / 4;
		ib->flags = chunk_ib->flags;

		j++;
	}

@@ -975,7 +991,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
		return -EINVAL;

	return 0;
	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
}

static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1176,6 +1192,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
	job->uf_sequence = seq;

	amdgpu_job_free_resources(job);
	amdgpu_ring_priority_get(job->ring,
				 amd_sched_get_job_priority(&job->base));

	trace_amdgpu_cs_ioctl(job);
	amd_sched_entity_push_job(&job->base);
@@ -1189,7 +1207,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
	union drm_amdgpu_cs *cs = data;
	struct amdgpu_cs_parser parser = {};
	bool reserved_buffers = false;
@@ -1197,8 +1214,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)

	if (!adev->accel_working)
		return -EBUSY;
	if (amdgpu_kms_vram_lost(adev, fpriv))
		return -ENODEV;

	parser.adev = adev;
	parser.filp = filp;
@@ -1209,6 +1224,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
		goto out;
	}

	r = amdgpu_cs_ib_fill(adev, &parser);
	if (r)
		goto out;

	r = amdgpu_cs_parser_bos(&parser, data);
	if (r) {
		if (r == -ENOMEM)
@@ -1219,9 +1238,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
	}

	reserved_buffers = true;
	r = amdgpu_cs_ib_fill(adev, &parser);
	if (r)
		goto out;

	r = amdgpu_cs_dependencies(adev, &parser);
	if (r) {
@@ -1257,16 +1273,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{
	union drm_amdgpu_wait_cs *wait = data;
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
	struct amdgpu_ring *ring = NULL;
	struct amdgpu_ctx *ctx;
	struct dma_fence *fence;
	long r;

	if (amdgpu_kms_vram_lost(adev, fpriv))
		return -ENODEV;

	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
	if (ctx == NULL)
		return -EINVAL;
@@ -1284,6 +1296,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
		r = PTR_ERR(fence);
	else if (fence) {
		r = dma_fence_wait_timeout(fence, true, timeout);
		if (r > 0 && fence->error)
			r = fence->error;
		dma_fence_put(fence);
	} else
		r = 1;
@@ -1335,16 +1349,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
				    struct drm_file *filp)
{
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
	union drm_amdgpu_fence_to_handle *info = data;
	struct dma_fence *fence;
	struct drm_syncobj *syncobj;
	struct sync_file *sync_file;
	int fd, r;

	if (amdgpu_kms_vram_lost(adev, fpriv))
		return -ENODEV;

	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
	if (IS_ERR(fence))
		return PTR_ERR(fence);
@@ -1425,6 +1435,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,

		if (r == 0)
			break;

		if (fence->error)
			return fence->error;
	}

	memset(wait, 0, sizeof(*wait));
@@ -1485,7 +1498,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
	wait->out.status = (r > 0);
	wait->out.first_signaled = first;
	/* set return value 0 to indicate success */
	r = 0;
	r = array[first]->error;

err_free_fence_array:
	for (i = 0; i < fence_count; i++)
@@ -1506,15 +1519,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
				struct drm_file *filp)
{
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
	union drm_amdgpu_wait_fences *wait = data;
	uint32_t fence_count = wait->in.fence_count;
	struct drm_amdgpu_fence *fences_user;
	struct drm_amdgpu_fence *fences;
	int r;

	if (amdgpu_kms_vram_lost(adev, fpriv))
		return -ENODEV;
	/* Get the fences from userspace */
	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
			GFP_KERNEL);
@@ -1572,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
		return -EINVAL;

	r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
	if (unlikely(r))
		return r;

	if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
		return 0;

	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
	return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false);
		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false,
				    false);
		if (r)
			return r;
	}

	return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
}
+96 −10
Original line number Diff line number Diff line
@@ -23,13 +23,41 @@
 */

#include <drm/drmP.h>
#include <drm/drm_auth.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"

static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
				      enum amd_sched_priority priority)
{
	/* NORMAL and below are accessible by everyone */
	if (priority <= AMD_SCHED_PRIORITY_NORMAL)
		return 0;

	if (capable(CAP_SYS_NICE))
		return 0;

	if (drm_is_current_master(filp))
		return 0;

	return -EACCES;
}

static int amdgpu_ctx_init(struct amdgpu_device *adev,
			   enum amd_sched_priority priority,
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
{
	unsigned i, j;
	int r;

	if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
		return -EINVAL;

	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

	memset(ctx, 0, sizeof(*ctx));
	ctx->adev = adev;
	kref_init(&ctx->refcount);
@@ -39,19 +67,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
	if (!ctx->fences)
		return -ENOMEM;

	mutex_init(&ctx->lock);

	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
		ctx->rings[i].sequence = 1;
		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
	}

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
	ctx->init_priority = priority;
	ctx->override_priority = AMD_SCHED_PRIORITY_UNSET;

	/* create context entity for each ring */
	for (i = 0; i < adev->num_rings; i++) {
		struct amdgpu_ring *ring = adev->rings[i];
		struct amd_sched_rq *rq;

		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
		rq = &ring->sched.sched_rq[priority];

		if (ring == &adev->gfx.kiq.ring)
			continue;
@@ -96,10 +129,14 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
				      &ctx->rings[i].entity);

	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);

	mutex_destroy(&ctx->lock);
}

static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv,
			    struct drm_file *filp,
			    enum amd_sched_priority priority,
			    uint32_t *id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@@ -117,8 +154,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
		kfree(ctx);
		return r;
	}

	*id = (uint32_t)r;
	r = amdgpu_ctx_init(adev, ctx);
	r = amdgpu_ctx_init(adev, priority, filp, ctx);
	if (r) {
		idr_remove(&mgr->ctx_handles, *id);
		*id = 0;
@@ -193,6 +231,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
{
	int r;
	uint32_t id;
	enum amd_sched_priority priority;

	union drm_amdgpu_ctx *args = data;
	struct amdgpu_device *adev = dev->dev_private;
@@ -200,10 +239,16 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,

	r = 0;
	id = args->in.ctx_id;
	priority = amdgpu_to_sched_priority(args->in.priority);

	/* For backwards compatibility reasons, we need to accept
	 * ioctls with garbage in the priority field */
	if (priority == AMD_SCHED_PRIORITY_INVALID)
		priority = AMD_SCHED_PRIORITY_NORMAL;

	switch (args->in.op) {
	case AMDGPU_CTX_OP_ALLOC_CTX:
		r = amdgpu_ctx_alloc(adev, fpriv, &id);
		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
		args->out.alloc.ctx_id = id;
		break;
	case AMDGPU_CTX_OP_FREE_CTX:
@@ -256,12 +301,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,

	idx = seq & (amdgpu_sched_jobs - 1);
	other = cring->fences[idx];
	if (other) {
		signed long r;
		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
		if (r < 0)
			return r;
	}
	if (other)
		BUG_ON(!dma_fence_is_signaled(other));

	dma_fence_get(fence);

@@ -305,6 +346,51 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
	return fence;
}

void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
				  enum amd_sched_priority priority)
{
	int i;
	struct amdgpu_device *adev = ctx->adev;
	struct amd_sched_rq *rq;
	struct amd_sched_entity *entity;
	struct amdgpu_ring *ring;
	enum amd_sched_priority ctx_prio;

	ctx->override_priority = priority;

	ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ?
			ctx->init_priority : ctx->override_priority;

	for (i = 0; i < adev->num_rings; i++) {
		ring = adev->rings[i];
		entity = &ctx->rings[i].entity;
		rq = &ring->sched.sched_rq[ctx_prio];

		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
			continue;

		amd_sched_entity_set_rq(entity, rq);
	}
}

int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
{
	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
	struct dma_fence *other = cring->fences[idx];

	if (other) {
		signed long r;
		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
		if (r < 0) {
			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
			return r;
		}
	}

	return 0;
}

void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
	mutex_init(&mgr->lock);
Loading