drm/i915: clear up wedged transitions (1f83fee0) · Commits · e / devices / android_kernel_sony_msm8998

drivers/gpu/drm/i915/i915_debugfs.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1672,7 +1672,7 @@ i915_wedged_read(struct file *filp,

		len = snprintf(buf, sizeof(buf),
		"wedged : %d\n",
		atomic_read(&dev_priv->gpu_error.wedged));
		atomic_read(&dev_priv->gpu_error.reset_counter));

		if (len > sizeof(buf))
		len = sizeof(buf);

drivers/gpu/drm/i915/i915_drv.h

+38 −2

Original line number	Diff line number	Diff line
		@@ -771,11 +771,37 @@ struct i915_gpu_error {
		/* Protected by the above dev->gpu_error.lock. */
		struct drm_i915_error_state *first_error;
		struct work_struct work;
		struct completion completion;

		unsigned long last_reset;

		atomic_t wedged;
		/**
		* State variable controlling the reset flow
		*
		* Upper bits are for the reset counter.
		*
		* Lowest bit controls the reset state machine: Set means a reset is in
		* progress. This state will (presuming we don't have any bugs) decay
		* into either unset (successful reset) or the special WEDGED value (hw
		* terminally sour). All waiters on the reset_queue will be woken when
		* that happens.
		*/
		atomic_t reset_counter;

		/**
		* Special values/flags for reset_counter
		*
		* Note that the code relies on
		* I915_WEDGED & I915_RESET_IN_PROGRESS_FLAG
		* being true.
		*/
		#define I915_RESET_IN_PROGRESS_FLAG 1
		#define I915_WEDGED 0xffffffff

		/**
		* Waitqueue to signal when the reset has completed. Used by clients
		* that wait for dev_priv->mm.wedged to settle.
		*/
		wait_queue_head_t reset_queue;

		/* For gpu hang simulation. */
		unsigned int stop_rings;
		@@ -1543,6 +1569,16 @@ void i915_gem_retire_requests(struct drm_device *dev);
		void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
		int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
		bool interruptible);
		static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
		{
		return unlikely(atomic_read(&error->reset_counter)
		& I915_RESET_IN_PROGRESS_FLAG);
		}

		static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
		{
		return atomic_read(&error->reset_counter) == I915_WEDGED;
		}

		void i915_gem_reset(struct drm_device *dev);
		void i915_gem_clflush_object(struct drm_i915_gem_object *obj);

drivers/gpu/drm/i915/i915_gem.c

+18 −31

Original line number	Diff line number	Diff line
		@@ -89,36 +89,32 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
		static int
		i915_gem_wait_for_error(struct i915_gpu_error *error)
		{
		struct completion *x = &error->completion;
		unsigned long flags;
		int ret;

		if (!atomic_read(&error->wedged))
		#define EXIT_COND (!i915_reset_in_progress(error))
		if (EXIT_COND)
		return 0;

		/* GPU is already declared terminally dead, give up. */
		if (i915_terminally_wedged(error))
		return -EIO;

		/*
		* Only wait 10 seconds for the gpu reset to complete to avoid hanging
		* userspace. If it takes that long something really bad is going on and
		* we should simply try to bail out and fail as gracefully as possible.
		*/
		ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
		ret = wait_event_interruptible_timeout(error->reset_queue,
		EXIT_COND,
		10*HZ);
		if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
		} else if (ret < 0) {
		return ret;
		}
		#undef EXIT_COND

		if (atomic_read(&error->wedged)) {
		/* GPU is hung, bump the completion count to account for
		* the token we just consumed so that we never hit zero and
		* end up waiting upon a subsequent completion event that
		* will never happen.
		*/
		spin_lock_irqsave(&x->wait.lock, flags);
		x->done++;
		spin_unlock_irqrestore(&x->wait.lock, flags);
		}
		return 0;
		}

		@@ -942,23 +938,14 @@ int
		i915_gem_check_wedge(struct i915_gpu_error *error,
		bool interruptible)
		{
		if (atomic_read(&error->wedged)) {
		struct completion *x = &error->completion;
		bool recovery_complete;
		unsigned long flags;

		/* Give the error handler a chance to run. */
		spin_lock_irqsave(&x->wait.lock, flags);
		recovery_complete = x->done > 0;
		spin_unlock_irqrestore(&x->wait.lock, flags);

		if (i915_reset_in_progress(error)) {
		/* Non-interruptible callers can't handle -EAGAIN, hence return
		* -EIO unconditionally for these. */
		if (!interruptible)
		return -EIO;

		/* Recovery complete, but still wedged means reset failure. */
		if (recovery_complete)
		/* Recovery complete, but the reset failed ... */
		if (i915_terminally_wedged(error))
		return -EIO;

		return -EAGAIN;
		@@ -1025,7 +1012,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,

		#define EXIT_COND \
		(i915_seqno_passed(ring->get_seqno(ring, false), seqno) \|\| \
		atomic_read(&dev_priv->gpu_error.wedged))
		i915_reset_in_progress(&dev_priv->gpu_error))
		do {
		if (interruptible)
		end = wait_event_interruptible_timeout(ring->irq_queue,
		@@ -1379,7 +1366,7 @@ out:
		/* If this -EIO is due to a gpu hang, give the reset code a
		* chance to clean up the mess. Otherwise return the proper
		* SIGBUS. */
		if (!atomic_read(&dev_priv->gpu_error.wedged))
		if (i915_terminally_wedged(&dev_priv->gpu_error))
		return VM_FAULT_SIGBUS;
		case -EAGAIN:
		/* Give the error handler a chance to run and move the
		@@ -3983,9 +3970,9 @@ i915_gem_entervt_ioctl(struct drm_device dev, void data,
		if (drm_core_check_feature(dev, DRIVER_MODESET))
		return 0;

		if (atomic_read(&dev_priv->gpu_error.wedged)) {
		if (i915_reset_in_progress(&dev_priv->gpu_error)) {
		DRM_ERROR("Reenabling wedged hardware, good luck\n");
		atomic_set(&dev_priv->gpu_error.wedged, 0);
		atomic_set(&dev_priv->gpu_error.reset_counter, 0);
		}

		mutex_lock(&dev->struct_mutex);
		@@ -4069,7 +4056,7 @@ i915_gem_load(struct drm_device *dev)
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
		INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
		i915_gem_retire_work_handler);
		init_completion(&dev_priv->gpu_error.completion);
		init_waitqueue_head(&dev_priv->gpu_error.reset_queue);

		/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
		if (IS_GEN3(dev)) {

drivers/gpu/drm/i915/i915_irq.c

+15 −8

Original line number	Diff line number	Diff line
		@@ -862,8 +862,10 @@ done:
		*/
		static void i915_error_work_func(struct work_struct *work)
		{
		drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
		gpu_error.work);
		struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
		work);
		drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t,
		gpu_error);
		struct drm_device *dev = dev_priv->dev;
		char *error_event[] = { "ERROR=1", NULL };
		char *reset_event[] = { "RESET=1", NULL };
		@@ -871,14 +873,18 @@ static void i915_error_work_func(struct work_struct *work)

		kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);

		if (atomic_read(&dev_priv->gpu_error.wedged)) {
		if (i915_reset_in_progress(error)) {
		DRM_DEBUG_DRIVER("resetting chip\n");
		kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);

		if (!i915_reset(dev)) {
		atomic_set(&dev_priv->gpu_error.wedged, 0);
		atomic_set(&error->reset_counter, 0);
		kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
		} else {
		atomic_set(&error->reset_counter, I915_WEDGED);
		}
		complete_all(&dev_priv->gpu_error.completion);

		wake_up_all(&dev_priv->gpu_error.reset_queue);
		}
		}

		@@ -1482,11 +1488,12 @@ void i915_handle_error(struct drm_device *dev, bool wedged)
		i915_report_and_clear_eir(dev);

		if (wedged) {
		INIT_COMPLETION(dev_priv->gpu_error.completion);
		atomic_set(&dev_priv->gpu_error.wedged, 1);
		atomic_set(&dev_priv->gpu_error.reset_counter,
		I915_RESET_IN_PROGRESS_FLAG);

		/*
		* Wakeup waiting processes so they don't hang
		* Wakeup waiting processes so that the reset work item
		* doesn't deadlock trying to grab various locks.
		*/
		for_each_ring(ring, dev_priv, i)
		wake_up_all(&ring->irq_queue);

drivers/gpu/drm/i915/intel_display.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -2223,7 +2223,7 @@ intel_finish_fb(struct drm_framebuffer *old_fb)
		WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue));

		wait_event(dev_priv->pending_flip_queue,
		atomic_read(&dev_priv->gpu_error.wedged) \|\|
		i915_reset_in_progress(&dev_priv->gpu_error) \|\|
		atomic_read(&obj->pending_flip) == 0);

		/* Big Hammer, we also need to ensure that any pending
		@@ -2871,7 +2871,7 @@ static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
		unsigned long flags;
		bool pending;

		if (atomic_read(&dev_priv->gpu_error.wedged))
		if (i915_reset_in_progress(&dev_priv->gpu_error))
		return false;

		spin_lock_irqsave(&dev->event_lock, flags);