Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8c185eca authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Split I915_RESET_IN_PROGRESS into two flags



I915_RESET_IN_PROGRESS is being used for both signaling the requirement
to i915_mutex_lock_interruptible() to avoid taking the struct_mutex and
to instruct a waiter (already holding the struct_mutex) to perform the
reset. To allow for a little more coordination, split these two meaning
into a couple of distinct flags. I915_RESET_BACKOFF tells
i915_mutex_lock_interruptible() not to acquire the mutex and
I915_RESET_HANDOFF tells the waiter to call i915_reset().

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Acked-by: default avatarMichel Thierry <michel.thierry@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170316171305.12972-1-chris@chris-wilson.co.uk
parent 3fc03069
Loading
Loading
Loading
Loading
+9 −7
Original line number Original line Diff line number Diff line
@@ -1305,16 +1305,18 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
	enum intel_engine_id id;
	enum intel_engine_id id;


	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
		seq_printf(m, "Wedged\n");
		seq_puts(m, "Wedged\n");
	if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
		seq_printf(m, "Reset in progress\n");
		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
		seq_printf(m, "Waiter holding struct mutex\n");
		seq_puts(m, "Waiter holding struct mutex\n");
	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
		seq_printf(m, "struct_mutex blocked for reset\n");
		seq_puts(m, "struct_mutex blocked for reset\n");


	if (!i915.enable_hangcheck) {
	if (!i915.enable_hangcheck) {
		seq_printf(m, "Hangcheck disabled\n");
		seq_puts(m, "Hangcheck disabled\n");
		return 0;
		return 0;
	}
	}


@@ -4127,7 +4129,7 @@ i915_wedged_set(void *data, u64 val)
	 * while it is writing to 'i915_wedged'
	 * while it is writing to 'i915_wedged'
	 */
	 */


	if (i915_reset_in_progress(&dev_priv->gpu_error))
	if (i915_reset_backoff(&dev_priv->gpu_error))
		return -EAGAIN;
		return -EAGAIN;


	i915_handle_error(dev_priv, val,
	i915_handle_error(dev_priv, val,
+5 −2
Original line number Original line Diff line number Diff line
@@ -1815,8 +1815,9 @@ void i915_reset(struct drm_i915_private *dev_priv)
	int ret;
	int ret;


	lockdep_assert_held(&dev_priv->drm.struct_mutex);
	lockdep_assert_held(&dev_priv->drm.struct_mutex);
	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));


	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
		return;
		return;


	/* Clear any previous failed attempts at recovery. Time to try again. */
	/* Clear any previous failed attempts at recovery. Time to try again. */
@@ -1869,7 +1870,9 @@ void i915_reset(struct drm_i915_private *dev_priv)
wakeup:
wakeup:
	i915_gem_reset_finish(dev_priv);
	i915_gem_reset_finish(dev_priv);
	enable_irq(dev_priv->drm.irq);
	enable_irq(dev_priv->drm.irq);
	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);

	clear_bit(I915_RESET_HANDOFF, &error->flags);
	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
	return;
	return;


error:
error:
+35 −5
Original line number Original line Diff line number Diff line
@@ -1595,8 +1595,33 @@ struct i915_gpu_error {
	 */
	 */
	unsigned long reset_count;
	unsigned long reset_count;


	/**
	 * flags: Control various stages of the GPU reset
	 *
	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
	 * other users acquiring the struct_mutex. To do this we set the
	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
	 * and then check for that bit before acquiring the struct_mutex (in
	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
	 * secondary role in preventing two concurrent global reset attempts.
	 *
	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
	 * but it may be held by some long running waiter (that we cannot
	 * interrupt without causing trouble). Once we are ready to do the GPU
	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
	 * they already hold the struct_mutex and want to participate they can
	 * inspect the bit and do the reset directly, otherwise the worker
	 * waits for the struct_mutex.
	 *
	 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
	 * i915_gem_request_alloc(), this bit is checked and the sequence
	 * aborted (with -EIO reported to userspace) if set.
	 */
	unsigned long flags;
	unsigned long flags;
#define I915_RESET_IN_PROGRESS	0
#define I915_RESET_BACKOFF	0
#define I915_RESET_HANDOFF	1
#define I915_WEDGED		(BITS_PER_LONG - 1)
#define I915_WEDGED		(BITS_PER_LONG - 1)


	/**
	/**
@@ -3387,9 +3412,14 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);


void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
void i915_gem_retire_requests(struct drm_i915_private *dev_priv);


static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
static inline bool i915_reset_backoff(struct i915_gpu_error *error)
{
	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
}

static inline bool i915_reset_handoff(struct i915_gpu_error *error)
{
{
	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
}
}


static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
@@ -3397,9 +3427,9 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
	return unlikely(test_bit(I915_WEDGED, &error->flags));
	return unlikely(test_bit(I915_WEDGED, &error->flags));
}
}


static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
{
{
	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
	return i915_reset_backoff(error) | i915_terminally_wedged(error);
}
}


static inline u32 i915_reset_count(struct i915_gpu_error *error)
static inline u32 i915_reset_count(struct i915_gpu_error *error)
+1 −4
Original line number Original line Diff line number Diff line
@@ -103,16 +103,13 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)


	might_sleep();
	might_sleep();


	if (!i915_reset_in_progress(error))
		return 0;

	/*
	/*
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * userspace. If it takes that long something really bad is going on and
	 * userspace. If it takes that long something really bad is going on and
	 * we should simply try to bail out and fail as gracefully as possible.
	 * we should simply try to bail out and fail as gracefully as possible.
	 */
	 */
	ret = wait_event_interruptible_timeout(error->reset_queue,
	ret = wait_event_interruptible_timeout(error->reset_queue,
					       !i915_reset_in_progress(error),
					       !i915_reset_backoff(error),
					       I915_RESET_TIMEOUT);
					       I915_RESET_TIMEOUT);
	if (ret == 0) {
	if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
+1 −1
Original line number Original line Diff line number Diff line
@@ -1012,7 +1012,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,


static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request)
static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request)
{
{
	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
	if (likely(!i915_reset_handoff(&request->i915->gpu_error)))
		return false;
		return false;


	__set_current_state(TASK_RUNNING);
	__set_current_state(TASK_RUNNING);
Loading