Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eb8d0f5a authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Remove GPU reset dependence on struct_mutex



Now that the submission backends are controlled via their own spinlocks,
with a wave of a magic wand we can lift the struct_mutex requirement
around GPU reset. That is we allow the submission frontend (userspace)
to keep on submitting while we process the GPU reset as we can suspend
the backend independently.

The major change is around the backoff/handoff strategy for performing
the reset. With no mutex deadlock, we no longer have to coordinate with
any waiter, and just perform the reset immediately.

Testcase: igt/gem_mmap_gtt/hang # regresses
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125132230.22221-3-chris@chris-wilson.co.uk
parent fe62365f
Loading
Loading
Loading
Loading
+7 −31
Original line number Diff line number Diff line
@@ -1284,8 +1284,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
		seq_puts(m, "Wedged\n");
	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
		seq_puts(m, "Waiter holding struct mutex\n");
	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -1321,15 +1319,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
		struct rb_node *rb;

		seq_printf(m, "%s:\n", engine->name);
		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
			   engine->hangcheck.seqno, seqno[id],
			   intel_engine_last_submit(engine));
		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
			   intel_engine_last_submit(engine),
			   jiffies_to_msecs(jiffies -
					    engine->hangcheck.action_timestamp));
		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
			   yesno(intel_engine_has_waiter(engine)),
			   yesno(test_bit(engine->id,
					  &dev_priv->gpu_error.missed_irq_rings)),
			   yesno(engine->hangcheck.stalled),
			   yesno(engine->hangcheck.wedged));
					  &dev_priv->gpu_error.missed_irq_rings)));

		spin_lock_irq(&b->rb_lock);
		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1343,11 +1341,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
			   (long long)engine->hangcheck.acthd,
			   (long long)acthd[id]);
		seq_printf(m, "\taction = %s(%d) %d ms ago\n",
			   hangcheck_action_to_str(engine->hangcheck.action),
			   engine->hangcheck.action,
			   jiffies_to_msecs(jiffies -
					    engine->hangcheck.action_timestamp));

		if (engine->id == RCS) {
			seq_puts(m, "\tinstdone read =\n");
@@ -3911,8 +3904,6 @@ static int
i915_wedged_set(void *data, u64 val)
{
	struct drm_i915_private *i915 = data;
	struct intel_engine_cs *engine;
	unsigned int tmp;

	/*
	 * There is no safeguard against this debugfs entry colliding
@@ -3925,18 +3916,8 @@ i915_wedged_set(void *data, u64 val)
	if (i915_reset_backoff(&i915->gpu_error))
		return -EAGAIN;

	for_each_engine_masked(engine, i915, val, tmp) {
		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
		engine->hangcheck.stalled = true;
	}

	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
			  "Manually set wedged engine mask = %llx", val);

	wait_on_bit(&i915->gpu_error.flags,
		    I915_RESET_HANDOFF,
		    TASK_UNINTERRUPTIBLE);

	return 0;
}

@@ -4091,13 +4072,8 @@ i915_drop_caches_set(void *data, u64 val)
		mutex_unlock(&i915->drm.struct_mutex);
	}

	if (val & DROP_RESET_ACTIVE &&
	    i915_terminally_wedged(&i915->gpu_error)) {
	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
		wait_on_bit(&i915->gpu_error.flags,
			    I915_RESET_HANDOFF,
			    TASK_UNINTERRUPTIBLE);
	}

	fs_reclaim_acquire(GFP_KERNEL);
	if (val & DROP_BOUND)
+0 −5
Original line number Diff line number Diff line
@@ -3001,11 +3001,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
}

static inline bool i915_reset_handoff(struct i915_gpu_error *error)
{
	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
}

static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
{
	return unlikely(test_bit(I915_WEDGED, &error->flags));
+7 −11
Original line number Diff line number Diff line
@@ -657,11 +657,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
		     struct intel_rps_client *rps_client)
{
	might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
	GEM_BUG_ON(debug_locks &&
		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
		   !!(flags & I915_WAIT_LOCKED));
#endif
	GEM_BUG_ON(timeout < 0);

	timeout = i915_gem_object_wait_reservation(obj->resv,
@@ -4493,8 +4488,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)

	GEM_TRACE("\n");

	mutex_lock(&i915->drm.struct_mutex);

	wakeref = intel_runtime_pm_get(i915);
	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);

@@ -4520,6 +4513,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
	intel_runtime_pm_put(i915, wakeref);

	mutex_lock(&i915->drm.struct_mutex);
	i915_gem_contexts_lost(i915);
	mutex_unlock(&i915->drm.struct_mutex);
}
@@ -4534,6 +4528,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
	wakeref = intel_runtime_pm_get(i915);
	intel_suspend_gt_powersave(i915);

	flush_workqueue(i915->wq);

	mutex_lock(&i915->drm.struct_mutex);

	/*
@@ -4563,11 +4559,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
	i915_retire_requests(i915); /* ensure we flush after wedging */

	mutex_unlock(&i915->drm.struct_mutex);
	i915_reset_flush(i915);

	intel_uc_suspend(i915);

	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
	cancel_delayed_work_sync(&i915->gt.retire_work);
	drain_delayed_work(&i915->gt.retire_work);

	/*
	 * As the idle_work is rearming if it detects a race, play safe and
@@ -4575,6 +4569,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
	 */
	drain_delayed_work(&i915->gt.idle_work);

	intel_uc_suspend(i915);

	/*
	 * Assert that we successfully flushed all the work and
	 * reset the GPU back to its idle, low power state.
+0 −1
Original line number Diff line number Diff line
@@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
};

#endif
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/pagevec.h>

#include "i915_request.h"
#include "i915_reset.h"
#include "i915_selftest.h"
#include "i915_timeline.h"

Loading