drm/i915/execlists: Minimalistic timeslicing (8ee36e04) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/gpu/drm/i915/gt/intel_engine_types.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@
		#include <linux/kref.h>
		#include <linux/list.h>
		#include <linux/llist.h>
		#include <linux/timer.h>
		#include <linux/types.h>

		#include "i915_gem.h"
		@@ -149,6 +150,11 @@ struct intel_engine_execlists {
		*/
		struct tasklet_struct tasklet;

		/**
		* @timer: kick the current context if its timeslice expires
		*/
		struct timer_list timer;

		/**
		* @default_priolist: priority list for I915_PRIORITY_NORMAL
		*/

drivers/gpu/drm/i915/gt/intel_lrc.c

+116 −0

Original line number	Diff line number	Diff line
		@@ -266,6 +266,7 @@ static int effective_prio(const struct i915_request *rq)
		prio \|= I915_PRIORITY_NOSEMAPHORE;

		/* Restrict mere WAIT boosts from triggering preemption */
		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
		return prio \| __NO_PREEMPTION;
		}

		@@ -830,6 +831,81 @@ last_active(const struct intel_engine_execlists *execlists)
		return *last;
		}

		static void
		defer_request(struct i915_request * const rq, struct list_head * const pl)
		{
		struct i915_dependency *p;

		/*
		* We want to move the interrupted request to the back of
		* the round-robin list (i.e. its priority level), but
		* in doing so, we must then move all requests that were in
		* flight and were waiting for the interrupted request to
		* be run after it again.
		*/
		list_move_tail(&rq->sched.link, pl);

		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
		struct i915_request *w =
		container_of(p->waiter, typeof(*w), sched);

		/* Leave semaphores spinning on the other engines */
		if (w->engine != rq->engine)
		continue;

		/* No waiter should start before the active request completed */
		GEM_BUG_ON(i915_request_started(w));

		GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
		if (rq_prio(w) < rq_prio(rq))
		continue;

		if (list_empty(&w->sched.link))
		continue; /* Not yet submitted; unready */

		/*
		* This should be very shallow as it is limited by the
		* number of requests that can fit in a ring (<64) and
		* the number of contexts that can be in flight on this
		* engine.
		*/
		defer_request(w, pl);
		}
		}

		static void defer_active(struct intel_engine_cs *engine)
		{
		struct i915_request *rq;

		rq = __unwind_incomplete_requests(engine);
		if (!rq)
		return;

		defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
		}

		static bool
		need_timeslice(struct intel_engine_cs engine, const struct i915_request rq)
		{
		int hint;

		if (list_is_last(&rq->sched.link, &engine->active.requests))
		return false;

		hint = max(rq_prio(list_next_entry(rq, sched.link)),
		engine->execlists.queue_priority_hint);

		return hint >= rq_prio(rq);
		}

		static bool
		enable_timeslice(struct intel_engine_cs *engine)
		{
		struct i915_request *last = last_active(&engine->execlists);

		return last && need_timeslice(engine, last);
		}

		static void execlists_dequeue(struct intel_engine_cs *engine)
		{
		struct intel_engine_execlists * const execlists = &engine->execlists;
		@@ -923,6 +999,32 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
		*/
		last->hw_context->lrc_desc \|= CTX_DESC_FORCE_RESTORE;
		last = NULL;
		} else if (need_timeslice(engine, last) &&
		!timer_pending(&engine->execlists.timer)) {
		GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
		engine->name,
		last->fence.context,
		last->fence.seqno,
		last->sched.attr.priority,
		execlists->queue_priority_hint);

		ring_set_paused(engine, 1);
		defer_active(engine);

		/*
		* Unlike for preemption, if we rewind and continue
		* executing the same context as previously active,
		* the order of execution will remain the same and
		* the tail will only advance. We do not need to
		* force a full context restore, as a lite-restore
		* is sufficient to resample the monotonic TAIL.
		*
		* If we switch to any other context, similarly we
		* will not rewind TAIL of current context, and
		* normal save/restore will preserve state and allow
		* us to later continue executing the same request.
		*/
		last = NULL;
		} else {
		/*
		* Otherwise if we already have a request pending
		@@ -1247,6 +1349,9 @@ static void process_csb(struct intel_engine_cs *engine)
		sizeof(*execlists->pending));
		execlists->pending[0] = NULL;

		if (enable_timeslice(engine))
		mod_timer(&execlists->timer, jiffies + 1);

		if (!inject_preempt_hang(execlists))
		ring_set_paused(engine, 0);
		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
		@@ -1317,6 +1422,15 @@ static void execlists_submission_tasklet(unsigned long data)
		spin_unlock_irqrestore(&engine->active.lock, flags);
		}

		static void execlists_submission_timer(struct timer_list *timer)
		{
		struct intel_engine_cs *engine =
		from_timer(engine, timer, execlists.timer);

		/* Kick the tasklet for some interrupt coalescing and reset handling */
		tasklet_hi_schedule(&engine->execlists.tasklet);
		}

		static void queue_request(struct intel_engine_cs *engine,
		struct i915_sched_node *node,
		int prio)
		@@ -2542,6 +2656,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)

		static void execlists_park(struct intel_engine_cs *engine)
		{
		del_timer_sync(&engine->execlists.timer);
		intel_engine_park(engine);
		}

		@@ -2639,6 +2754,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)

		tasklet_init(&engine->execlists.tasklet,
		execlists_submission_tasklet, (unsigned long)engine);
		timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);

		logical_ring_default_vfuncs(engine);
		logical_ring_default_irqs(engine);

drivers/gpu/drm/i915/gt/selftest_lrc.c

+223 −0

Original line number	Diff line number	Diff line
		@@ -79,6 +79,225 @@ static int live_sanitycheck(void *arg)
		return err;
		}

		static int
		emit_semaphore_chain(struct i915_request rq, struct i915_vma vma, int idx)
		{
		u32 *cs;

		cs = intel_ring_begin(rq, 10);
		if (IS_ERR(cs))
		return PTR_ERR(cs);

		*cs++ = MI_ARB_ON_OFF \| MI_ARB_ENABLE;

		*cs++ = MI_SEMAPHORE_WAIT \|
		MI_SEMAPHORE_GLOBAL_GTT \|
		MI_SEMAPHORE_POLL \|
		MI_SEMAPHORE_SAD_NEQ_SDD;
		*cs++ = 0;
		cs++ = i915_ggtt_offset(vma) + 4 idx;
		*cs++ = 0;

		if (idx > 0) {
		*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
		cs++ = i915_ggtt_offset(vma) + 4 (idx - 1);
		*cs++ = 0;
		*cs++ = 1;
		} else {
		*cs++ = MI_NOOP;
		*cs++ = MI_NOOP;
		*cs++ = MI_NOOP;
		*cs++ = MI_NOOP;
		}

		*cs++ = MI_ARB_ON_OFF \| MI_ARB_DISABLE;

		intel_ring_advance(rq, cs);
		return 0;
		}

		static struct i915_request *
		semaphore_queue(struct intel_engine_cs engine, struct i915_vma vma, int idx)
		{
		struct i915_gem_context *ctx;
		struct i915_request *rq;
		int err;

		ctx = kernel_context(engine->i915);
		if (!ctx)
		return ERR_PTR(-ENOMEM);

		rq = igt_request_alloc(ctx, engine);
		if (IS_ERR(rq))
		goto out_ctx;

		err = emit_semaphore_chain(rq, vma, idx);
		i915_request_add(rq);
		if (err)
		rq = ERR_PTR(err);

		out_ctx:
		kernel_context_close(ctx);
		return rq;
		}

		static int
		release_queue(struct intel_engine_cs *engine,
		struct i915_vma *vma,
		int idx)
		{
		struct i915_sched_attr attr = {
		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
		};
		struct i915_request *rq;
		u32 *cs;

		rq = i915_request_create(engine->kernel_context);
		if (IS_ERR(rq))
		return PTR_ERR(rq);

		cs = intel_ring_begin(rq, 4);
		if (IS_ERR(cs)) {
		i915_request_add(rq);
		return PTR_ERR(cs);
		}

		*cs++ = MI_STORE_DWORD_IMM_GEN4 \| MI_USE_GGTT;
		cs++ = i915_ggtt_offset(vma) + 4 (idx - 1);
		*cs++ = 0;
		*cs++ = 1;

		intel_ring_advance(rq, cs);
		i915_request_add(rq);

		engine->schedule(rq, &attr);

		return 0;
		}

		static int
		slice_semaphore_queue(struct intel_engine_cs *outer,
		struct i915_vma *vma,
		int count)
		{
		struct intel_engine_cs *engine;
		struct i915_request *head;
		enum intel_engine_id id;
		int err, i, n = 0;

		head = semaphore_queue(outer, vma, n++);
		if (IS_ERR(head))
		return PTR_ERR(head);

		i915_request_get(head);
		for_each_engine(engine, outer->i915, id) {
		for (i = 0; i < count; i++) {
		struct i915_request *rq;

		rq = semaphore_queue(engine, vma, n++);
		if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto out;
		}
		}
		}

		err = release_queue(outer, vma, n);
		if (err)
		goto out;

		if (i915_request_wait(head,
		I915_WAIT_LOCKED,
		2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
		count, n);
		GEM_TRACE_DUMP();
		i915_gem_set_wedged(outer->i915);
		err = -EIO;
		}

		out:
		i915_request_put(head);
		return err;
		}

		static int live_timeslice_preempt(void *arg)
		{
		struct drm_i915_private *i915 = arg;
		struct drm_i915_gem_object *obj;
		intel_wakeref_t wakeref;
		struct i915_vma *vma;
		void *vaddr;
		int err = 0;
		int count;

		/*
		* If a request takes too long, we would like to give other users
		* a fair go on the GPU. In particular, users may create batches
		* that wait upon external input, where that input may even be
		* supplied by another GPU job. To avoid blocking forever, we
		* need to preempt the current task and replace it with another
		* ready task.
		*/

		mutex_lock(&i915->drm.struct_mutex);
		wakeref = intel_runtime_pm_get(&i915->runtime_pm);

		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
		if (IS_ERR(obj)) {
		err = PTR_ERR(obj);
		goto err_unlock;
		}

		vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
		if (IS_ERR(vma)) {
		err = PTR_ERR(vma);
		goto err_obj;
		}

		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
		if (IS_ERR(vaddr)) {
		err = PTR_ERR(vaddr);
		goto err_obj;
		}

		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
		if (err)
		goto err_map;

		for_each_prime_number_from(count, 1, 16) {
		struct intel_engine_cs *engine;
		enum intel_engine_id id;

		for_each_engine(engine, i915, id) {
		memset(vaddr, 0, PAGE_SIZE);

		err = slice_semaphore_queue(engine, vma, count);
		if (err)
		goto err_pin;

		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
		err = -EIO;
		goto err_pin;
		}
		}
		}

		err_pin:
		i915_vma_unpin(vma);
		err_map:
		i915_gem_object_unpin_map(obj);
		err_obj:
		i915_gem_object_put(obj);
		err_unlock:
		if (igt_flush_test(i915, I915_WAIT_LOCKED))
		err = -EIO;
		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
		mutex_unlock(&i915->drm.struct_mutex);

		return err;
		}

		static int live_busywait_preempt(void *arg)
		{
		struct drm_i915_private *i915 = arg;
		@@ -398,6 +617,9 @@ static int live_late_preempt(void *arg)
		if (!ctx_lo)
		goto err_ctx_hi;

		/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
		ctx_lo->sched.priority = I915_USER_PRIORITY(1);

		for_each_engine(engine, i915, id) {
		struct igt_live_test t;
		struct i915_request *rq;
		@@ -1812,6 +2034,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
		{
		static const struct i915_subtest tests[] = {
		SUBTEST(live_sanitycheck),
		SUBTEST(live_timeslice_preempt),
		SUBTEST(live_busywait_preempt),
		SUBTEST(live_preempt),
		SUBTEST(live_late_preempt),

drivers/gpu/drm/i915/i915_scheduler.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -394,6 +394,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
		list_add(&dep->wait_link, &signal->waiters_list);
		list_add(&dep->signal_link, &node->signalers_list);
		dep->signaler = signal;
		dep->waiter = node;
		dep->flags = flags;

		/* Keep track of whether anyone on this chain has a semaphore */

drivers/gpu/drm/i915/i915_scheduler_types.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -62,6 +62,7 @@ struct i915_sched_node {

		struct i915_dependency {
		struct i915_sched_node *signaler;
		struct i915_sched_node *waiter;
		struct list_head signal_link;
		struct list_head wait_link;
		struct list_head dfs_link;