drm/i915: Revoke mmaps and prevent access to fence registers across reset (2caffbf1) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/gpu/drm/i915/i915_debugfs.c

+3 −9

Original line number	Diff line number	Diff line
		@@ -1280,14 +1280,11 @@ static int i915_hangcheck_info(struct seq_file m, void unused)
		intel_wakeref_t wakeref;
		enum intel_engine_id id;

		seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
		if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
		seq_puts(m, "Wedged\n");
		seq_puts(m, "\tWedged\n");
		if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
		if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
		seq_puts(m, "Waiter holding struct mutex\n");
		if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
		seq_puts(m, "struct_mutex blocked for reset\n");
		seq_puts(m, "\tDevice (global) reset in progress\n");

		if (!i915_modparams.enable_hangcheck) {
		seq_puts(m, "Hangcheck disabled\n");
		@@ -3872,9 +3869,6 @@ i915_wedged_set(void *data, u64 val)
		* while it is writing to 'i915_wedged'
		*/

		if (i915_reset_backoff(&i915->gpu_error))
		return -EAGAIN;

		i915_handle_error(i915, val, I915_ERROR_CAPTURE,
		"Manually set wedged engine mask = %llx", val);
		return 0;

drivers/gpu/drm/i915/i915_drv.h

+6 −12

Original line number	Diff line number	Diff line
		@@ -3001,7 +3001,12 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
		i915_gem_object_unpin_pages(obj);
		}

		int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
		static inline int __must_check
		i915_mutex_lock_interruptible(struct drm_device *dev)
		{
		return mutex_lock_interruptible(&dev->struct_mutex);
		}

		int i915_gem_dumb_create(struct drm_file *file_priv,
		struct drm_device *dev,
		struct drm_mode_create_dumb *args);
		@@ -3018,21 +3023,11 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
		struct i915_request *
		i915_gem_find_active_request(struct intel_engine_cs *engine);

		static inline bool i915_reset_backoff(struct i915_gpu_error *error)
		{
		return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
		}

		static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
		{
		return unlikely(test_bit(I915_WEDGED, &error->flags));
		}

		static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
		{
		return i915_reset_backoff(error) \| i915_terminally_wedged(error);
		}

		static inline u32 i915_reset_count(struct i915_gpu_error *error)
		{
		return READ_ONCE(error->reset_count);
		@@ -3105,7 +3100,6 @@ struct drm_i915_fence_reg *
		i915_reserve_fence(struct drm_i915_private *dev_priv);
		void i915_unreserve_fence(struct drm_i915_fence_reg *fence);

		void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
		void i915_gem_restore_fences(struct drm_i915_private *dev_priv);

		void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);

drivers/gpu/drm/i915/i915_gem.c

+13 −43

Original line number	Diff line number	Diff line
		@@ -98,47 +98,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
		spin_unlock(&dev_priv->mm.object_stat_lock);
		}

		static int
		i915_gem_wait_for_error(struct i915_gpu_error *error)
		{
		int ret;

		might_sleep();

		/*
		* Only wait 10 seconds for the gpu reset to complete to avoid hanging
		* userspace. If it takes that long something really bad is going on and
		* we should simply try to bail out and fail as gracefully as possible.
		*/
		ret = wait_event_interruptible_timeout(error->reset_queue,
		!i915_reset_backoff(error),
		I915_RESET_TIMEOUT);
		if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
		} else if (ret < 0) {
		return ret;
		} else {
		return 0;
		}
		}

		int i915_mutex_lock_interruptible(struct drm_device *dev)
		{
		struct drm_i915_private *dev_priv = to_i915(dev);
		int ret;

		ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
		if (ret)
		return ret;

		ret = mutex_lock_interruptible(&dev->struct_mutex);
		if (ret)
		return ret;

		return 0;
		}

		static u32 __i915_gem_park(struct drm_i915_private *i915)
		{
		intel_wakeref_t wakeref;
		@@ -1885,6 +1844,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
		intel_wakeref_t wakeref;
		struct i915_vma *vma;
		pgoff_t page_offset;
		int srcu;
		int ret;

		/* Sanity check that we allow writing into this object */
		@@ -1924,7 +1884,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
		goto err_unlock;
		}


		/* Now pin it into the GTT as needed */
		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
		PIN_MAPPABLE \|
		@@ -1962,9 +1921,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
		if (ret)
		goto err_unpin;

		srcu = i915_reset_trylock(dev_priv);
		if (srcu < 0) {
		ret = srcu;
		goto err_unpin;
		}

		ret = i915_vma_pin_fence(vma);
		if (ret)
		goto err_unpin;
		goto err_reset;

		/* Finally, remap it using the new GTT offset */
		ret = remap_io_mapping(area,
		@@ -1985,6 +1950,8 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)

		err_fence:
		i915_vma_unpin_fence(vma);
		err_reset:
		i915_reset_unlock(dev_priv, srcu);
		err_unpin:
		__i915_vma_unpin(vma);
		err_unlock:
		@@ -5342,6 +5309,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
		init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
		init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
		mutex_init(&dev_priv->gpu_error.wedge_mutex);
		init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);

		atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);

		@@ -5374,6 +5342,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
		GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
		WARN_ON(dev_priv->mm.object_count);

		cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);

		kmem_cache_destroy(dev_priv->priorities);
		kmem_cache_destroy(dev_priv->dependencies);
		kmem_cache_destroy(dev_priv->requests);

drivers/gpu/drm/i915/i915_gem_fence_reg.c

+9 −27

Original line number	Diff line number	Diff line
		@@ -270,6 +270,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
		return 0;
		}

		ret = i915_reset_trylock(fence->i915);
		if (ret < 0)
		goto out_rpm;

		fence_write(fence, vma);
		fence->vma = vma;

		@@ -278,8 +282,12 @@ static int fence_update(struct drm_i915_fence_reg *fence,
		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
		}

		i915_reset_unlock(fence->i915, ret);
		ret = 0;

		out_rpm:
		intel_runtime_pm_put(fence->i915, wakeref);
		return 0;
		return ret;
		}

		/**
		@@ -442,32 +450,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
		list_add(&fence->link, &fence->i915->mm.fence_list);
		}

		/**
		* i915_gem_revoke_fences - revoke fence state
		* @dev_priv: i915 device private
		*
		* Removes all GTT mmappings via the fence registers. This forces any user
		* of the fence to reacquire that fence before continuing with their access.
		* One use is during GPU reset where the fence register is lost and we need to
		* revoke concurrent userspace access via GTT mmaps until the hardware has been
		* reset and the fence registers have been restored.
		*/
		void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
		{
		int i;

		lockdep_assert_held(&dev_priv->drm.struct_mutex);

		for (i = 0; i < dev_priv->num_fence_regs; i++) {
		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];

		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);

		if (fence->vma)
		i915_vma_revoke_mmap(fence->vma);
		}
		}

		/**
		* i915_gem_restore_fences - restore fence state
		* @dev_priv: i915 device private

drivers/gpu/drm/i915/i915_gpu_error.h

+9 −30

Original line number	Diff line number	Diff line
		@@ -204,39 +204,13 @@ struct i915_gpu_error {

		atomic_t pending_fb_pin;

		/**
		* State variable controlling the reset flow and count
		*
		* This is a counter which gets incremented when reset is triggered,
		*
		* Before the reset commences, the I915_RESET_BACKOFF bit is set
		* meaning that any waiters holding onto the struct_mutex should
		* relinquish the lock immediately in order for the reset to start.
		*
		* If reset is not completed successfully, the I915_WEDGE bit is
		* set meaning that hardware is terminally sour and there is no
		* recovery. All waiters on the reset_queue will be woken when
		* that happens.
		*
		* This counter is used by the wait_seqno code to notice that reset
		* event happened and it needs to restart the entire ioctl (since most
		* likely the seqno it waited for won't ever signal anytime soon).
		*
		* This is important for lock-free wait paths, where no contended lock
		* naturally enforces the correct ordering between the bail-out of the
		* waiter and the gpu reset work code.
		*/
		unsigned long reset_count;

		/**
		* flags: Control various stages of the GPU reset
		*
		* #I915_RESET_BACKOFF - When we start a reset, we want to stop any
		* other users acquiring the struct_mutex. To do this we set the
		* #I915_RESET_BACKOFF bit in the error flags when we detect a reset
		* and then check for that bit before acquiring the struct_mutex (in
		* i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
		* secondary role in preventing two concurrent global reset attempts.
		* #I915_RESET_BACKOFF - When we start a global reset, we need to
		* serialise with any other users attempting to do the same, and
		* any global resources that may be clobber by the reset (such as
		* FENCE registers).
		*
		* #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
		* acquire the struct_mutex to reset an engine, we need an explicit
		@@ -255,6 +229,9 @@ struct i915_gpu_error {
		#define I915_RESET_ENGINE 2
		#define I915_WEDGED (BITS_PER_LONG - 1)

		/** Number of times the device has been reset (global) */
		u32 reset_count;

		/** Number of times an engine has been reset */
		u32 reset_engine_count[I915_NUM_ENGINES];

		@@ -272,6 +249,8 @@ struct i915_gpu_error {
		*/
		wait_queue_head_t reset_queue;

		struct srcu_struct reset_backoff_srcu;

		struct i915_gpu_restart *restart;
		};