Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 70deeadd authored by Sagar Arun Kamble's avatar Sagar Arun Kamble Committed by Chris Wilson
Browse files

drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex



This patch fixes lockdep issue due to circular locking dependency of
struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
For GuC log relay channel we create debugfs file that requires i_mutex_key
lock and we are doing that under struct_mutex. So we introduced newer
dependency as:
    &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the relay create/destroy operation from under struct_mutex.
Also added runtime check of relay buffer status.
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
------------------------------------------------------
debugfs_test/1388 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&mm->mmap_sem){++++}:
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xdc/0x140
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #2 (&sb->s_type->i_mutex_key#3){++++}:
       start_creating+0x59/0x110
       __debugfs_create_file+0x2e/0xe0
       relay_create_buf_file+0x62/0x80
       relay_late_setup_files+0x84/0x250
       guc_log_late_setup+0x4f/0x110 [i915]
       i915_guc_log_register+0x32/0x40 [i915]
       i915_driver_load+0x7b6/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #1 (relay_channels_mutex){+.+.}:
       relay_open+0x12c/0x2b0
       intel_guc_log_runtime_create+0xab/0x230 [i915]
       intel_guc_init+0x81/0x120 [i915]
       intel_uc_init+0x29/0xa0 [i915]
       i915_gem_init+0x182/0x530 [i915]
       i915_driver_load+0xaa9/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x790 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x677/0xdc0
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x4c/0x60

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#3);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1388:
 #0:  (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug.isra.18+0x1d0/0x2c0
 __lock_acquire+0x14ae/0x1b60
 ? lock_acquire+0xaf/0x200
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x790 [i915]
 __do_fault+0x15/0x70
 ? _raw_spin_unlock+0x29/0x40
 __handle_mm_fault+0x677/0xdc0
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 ? page_fault+0x36/0x60
 page_fault+0x4c/0x60

v2: Added lock protection to guc->log.runtime.relay_chan (Chris)
    Fixed locking inside guc_flush_logs uncovered by new lockdep.

v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris)
    Prepared intel_guc_init_early. Moved relay_lock inside relay_create
    relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal)
    Removed struct_mutex lock around guc_log_flush and removed usage
    of guc_log_has_relay() from runtime_create path as it needs
    struct_mutex lock.

v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer
    (Chris). Fixed comment suffix **/. (Michal)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693


Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1
Signed-off-by: default avatarSagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Marta Lofstedt <marta.lofstedt@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
parent 1ed21cb4
Loading
Loading
Loading
Loading
+1 −11
Original line number Diff line number Diff line
@@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data, u64 *val)
static int i915_guc_log_control_set(void *data, u64 val)
{
	struct drm_i915_private *dev_priv = data;
	int ret;

	if (!HAS_GUC(dev_priv))
		return -ENODEV;
@@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
	if (!dev_priv->guc.log.vma)
		return -EINVAL;

	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
	if (ret)
		return ret;

	intel_runtime_pm_get(dev_priv);
	ret = i915_guc_log_control(dev_priv, val);
	intel_runtime_pm_put(dev_priv);

	mutex_unlock(&dev_priv->drm.struct_mutex);
	return ret;
	return i915_guc_log_control(dev_priv, val);
}

DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
+1 −1
Original line number Diff line number Diff line
@@ -626,7 +626,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
	i915_gem_contexts_fini(dev_priv);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	intel_uc_fini_wq(dev_priv);
	intel_uc_fini_misc(dev_priv);
	i915_gem_cleanup_userptr(dev_priv);

	i915_gem_drain_freed_objects(dev_priv);
+2 −2
Original line number Diff line number Diff line
@@ -5272,7 +5272,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
	if (ret)
		return ret;

	ret = intel_uc_init_wq(dev_priv);
	ret = intel_uc_init_misc(dev_priv);
	if (ret)
		return ret;

@@ -5368,7 +5368,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	intel_uc_fini_wq(dev_priv);
	intel_uc_fini_misc(dev_priv);

	if (ret != -EIO)
		i915_gem_cleanup_userptr(dev_priv);
+6 −1
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ void intel_guc_init_early(struct intel_guc *guc)
{
	intel_guc_fw_init_early(guc);
	intel_guc_ct_init_early(&guc->ct);
	intel_guc_log_init_early(guc);

	mutex_init(&guc->send_mutex);
	guc->send = intel_guc_send_nop;
@@ -87,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc)
	 */
	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
						WQ_HIGHPRI | WQ_FREEZABLE);
	if (!guc->log.runtime.flush_wq)
	if (!guc->log.runtime.flush_wq) {
		DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
		return -ENOMEM;
	}

	/*
	 * Even though both sending GuC action, and adding a new workitem to
@@ -109,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc)
							  WQ_HIGHPRI);
		if (!guc->preempt_wq) {
			destroy_workqueue(guc->log.runtime.flush_wq);
			DRM_ERROR("Couldn't allocate workqueue for GuC "
				  "preemption\n");
			return -ENOMEM;
		}
	}
+127 −44
Original line number Diff line number Diff line
@@ -153,6 +153,8 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
	if (!i915_modparams.guc_log_level)
		return 0;

	mutex_lock(&guc->log.runtime.relay_lock);

	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
	log_dir = dev_priv->drm.primary->debugfs_root;

@@ -169,16 +171,26 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
	 */
	if (!log_dir) {
		DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
		return -ENODEV;
		ret = -ENODEV;
		goto out_unlock;
	}

	ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
	if (ret < 0 && ret != -EEXIST) {
		DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
		goto out_unlock;
	}

out_unlock:
	mutex_unlock(&guc->log.runtime.relay_lock);
	return ret;
}

	return 0;
static bool guc_log_has_relay(struct intel_guc *guc)
{
	lockdep_assert_held(&guc->log.runtime.relay_lock);

	return guc->log.runtime.relay_chan != NULL;
}

static void guc_move_to_next_buf(struct intel_guc *guc)
@@ -188,6 +200,9 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
	 */
	smp_wmb();

	if (!guc_log_has_relay(guc))
		return;

	/* All data has been written, so now move the offset of sub buffer. */
	relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);

@@ -197,7 +212,7 @@ static void guc_move_to_next_buf(struct intel_guc *guc)

static void *guc_get_write_buffer(struct intel_guc *guc)
{
	if (!guc->log.runtime.relay_chan)
	if (!guc_log_has_relay(guc))
		return NULL;

	/* Just get the base address of a new sub buffer and copy data into it
@@ -265,9 +280,22 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
	/* Get the pointer to shared GuC log buffer */
	log_buf_state = src_data = guc->log.runtime.buf_addr;

	mutex_lock(&guc->log.runtime.relay_lock);

	/* Get the pointer to local buffer to store the logs */
	log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);

	if (unlikely(!log_buf_snapshot_state)) {
		/* Used rate limited to avoid deluge of messages, logs might be
		 * getting consumed by User at a slow rate.
		 */
		DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
		guc->log.capture_miss_count++;
		mutex_unlock(&guc->log.runtime.relay_lock);

		return;
	}

	/* Actual logs are present from the 2nd page */
	src_data += PAGE_SIZE;
	dst_data += PAGE_SIZE;
@@ -293,9 +321,6 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
		log_buf_state->flush_to_file = 0;
		log_buf_state++;

		if (unlikely(!log_buf_snapshot_state))
			continue;

		/* First copy the state structure in snapshot buffer */
		memcpy(log_buf_snapshot_state, &log_buf_state_local,
		       sizeof(struct guc_log_buffer_state));
@@ -335,15 +360,9 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
		dst_data += buffer_size;
	}

	if (log_buf_snapshot_state)
	guc_move_to_next_buf(guc);
	else {
		/* Used rate limited to avoid deluge of messages, logs might be
		 * getting consumed by User at a slow rate.
		 */
		DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
		guc->log.capture_miss_count++;
	}

	mutex_unlock(&guc->log.runtime.relay_lock);
}

static void capture_logs_work(struct work_struct *work)
@@ -363,8 +382,6 @@ static int guc_log_runtime_create(struct intel_guc *guc)
{
	struct drm_i915_private *dev_priv = guc_to_i915(guc);
	void *vaddr;
	struct rchan *guc_log_relay_chan;
	size_t n_subbufs, subbuf_size;
	int ret;

	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -387,8 +404,44 @@ static int guc_log_runtime_create(struct intel_guc *guc)

	guc->log.runtime.buf_addr = vaddr;

	return 0;
}

static void guc_log_runtime_destroy(struct intel_guc *guc)
{
	/*
	 * It's possible that the runtime stuff was never allocated because
	 * GuC log was disabled at the boot time.
	 */
	if (!guc_log_has_runtime(guc))
		return;

	i915_gem_object_unpin_map(guc->log.vma->obj);
	guc->log.runtime.buf_addr = NULL;
}

void intel_guc_log_init_early(struct intel_guc *guc)
{
	mutex_init(&guc->log.runtime.relay_lock);
	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
}

int intel_guc_log_relay_create(struct intel_guc *guc)
{
	struct drm_i915_private *dev_priv = guc_to_i915(guc);
	struct rchan *guc_log_relay_chan;
	size_t n_subbufs, subbuf_size;
	int ret;

	if (!i915_modparams.guc_log_level)
		return 0;

	mutex_lock(&guc->log.runtime.relay_lock);

	GEM_BUG_ON(guc_log_has_relay(guc));

	 /* Keep the size of sub buffers same as shared log buffer */
	subbuf_size = guc->log.vma->obj->base.size;
	subbuf_size = GUC_LOG_SIZE;

	/* Store up to 8 snapshots, which is large enough to buffer sufficient
	 * boot time logs and provides enough leeway to User, in terms of
@@ -407,33 +460,39 @@ static int guc_log_runtime_create(struct intel_guc *guc)
		DRM_ERROR("Couldn't create relay chan for GuC logging\n");

		ret = -ENOMEM;
		goto err_vaddr;
		goto err;
	}

	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
	guc->log.runtime.relay_chan = guc_log_relay_chan;

	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
	mutex_unlock(&guc->log.runtime.relay_lock);

	return 0;

err_vaddr:
	i915_gem_object_unpin_map(guc->log.vma->obj);
	guc->log.runtime.buf_addr = NULL;
err:
	mutex_unlock(&guc->log.runtime.relay_lock);
	/* logging will be off */
	i915_modparams.guc_log_level = 0;
	return ret;
}

static void guc_log_runtime_destroy(struct intel_guc *guc)
void intel_guc_log_relay_destroy(struct intel_guc *guc)
{
	mutex_lock(&guc->log.runtime.relay_lock);

	/*
	 * It's possible that the runtime stuff was never allocated because
	 * It's possible that the relay was never allocated because
	 * GuC log was disabled at the boot time.
	 */
	if (!guc_log_has_runtime(guc))
		return;
	if (!guc_log_has_relay(guc))
		goto out_unlock;

	relay_close(guc->log.runtime.relay_chan);
	i915_gem_object_unpin_map(guc->log.vma->obj);
	guc->log.runtime.buf_addr = NULL;
	guc->log.runtime.relay_chan = NULL;

out_unlock:
	mutex_unlock(&guc->log.runtime.relay_lock);
}

static int guc_log_late_setup(struct intel_guc *guc)
@@ -441,17 +500,24 @@ static int guc_log_late_setup(struct intel_guc *guc)
	struct drm_i915_private *dev_priv = guc_to_i915(guc);
	int ret;

	lockdep_assert_held(&dev_priv->drm.struct_mutex);

	if (!guc_log_has_runtime(guc)) {
		/*
		 * If log was disabled at boot time, then setup needed to handle
		 * log buffer flush interrupts would not have been done yet, so
		 * do that now.
		 */
		ret = guc_log_runtime_create(guc);
		ret = intel_guc_log_relay_create(guc);
		if (ret)
			goto err;

		mutex_lock(&dev_priv->drm.struct_mutex);
		intel_runtime_pm_get(dev_priv);
		ret = guc_log_runtime_create(guc);
		intel_runtime_pm_put(dev_priv);
		mutex_unlock(&dev_priv->drm.struct_mutex);

		if (ret)
			goto err_relay;
	}

	ret = guc_log_relay_file_create(guc);
@@ -461,7 +527,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
	return 0;

err_runtime:
	mutex_lock(&dev_priv->drm.struct_mutex);
	guc_log_runtime_destroy(guc);
	mutex_unlock(&dev_priv->drm.struct_mutex);
err_relay:
	intel_guc_log_relay_destroy(guc);
err:
	/* logging will remain off */
	i915_modparams.guc_log_level = 0;
@@ -490,7 +560,11 @@ static void guc_flush_logs(struct intel_guc *guc)
		return;

	/* First disable the interrupts, will be renabled afterwards */
	mutex_lock(&dev_priv->drm.struct_mutex);
	intel_runtime_pm_get(dev_priv);
	gen9_disable_guc_interrupts(dev_priv);
	intel_runtime_pm_put(dev_priv);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	/* Before initiating the forceful flush, wait for any pending/ongoing
	 * flush to complete otherwise forceful flush may not actually happen.
@@ -498,7 +572,9 @@ static void guc_flush_logs(struct intel_guc *guc)
	flush_work(&guc->log.runtime.flush_work);

	/* Ask GuC to update the log buffer state */
	intel_runtime_pm_get(dev_priv);
	guc_log_flush(guc);
	intel_runtime_pm_put(dev_priv);

	/* GuC would have updated log buffer by now, so capture it */
	guc_log_capture_logs(guc);
@@ -509,17 +585,10 @@ int intel_guc_log_create(struct intel_guc *guc)
	struct i915_vma *vma;
	unsigned long offset;
	u32 flags;
	u32 size;
	int ret;

	GEM_BUG_ON(guc->log.vma);

	/* The first page is to save log buffer state. Allocate one
	 * extra page for others in case for overlap */
	size = (1 + GUC_LOG_DPC_PAGES + 1 +
		GUC_LOG_ISR_PAGES + 1 +
		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;

	/* We require SSE 4.1 for fast reads from the GuC log buffer and
	 * it should be present on the chipsets supporting GuC based
	 * submisssions.
@@ -529,7 +598,7 @@ int intel_guc_log_create(struct intel_guc *guc)
		goto err;
	}

	vma = intel_guc_allocate_vma(guc, size);
	vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
	if (IS_ERR(vma)) {
		ret = PTR_ERR(vma);
		goto err;
@@ -584,7 +653,15 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
		return 0;

	verbosity = enable_logging ? control_val - 1 : 0;

	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
	if (ret)
		return ret;
	intel_runtime_pm_get(dev_priv);
	ret = guc_log_control(guc, enable_logging, verbosity);
	intel_runtime_pm_put(dev_priv);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	if (ret < 0) {
		DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
		return ret;
@@ -605,7 +682,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
		}

		/* GuC logging is currently the only user of Guc2Host interrupts */
		mutex_lock(&dev_priv->drm.struct_mutex);
		intel_runtime_pm_get(dev_priv);
		gen9_enable_guc_interrupts(dev_priv);
		intel_runtime_pm_put(dev_priv);
		mutex_unlock(&dev_priv->drm.struct_mutex);
	} else {
		/*
		 * Once logging is disabled, GuC won't generate logs & send an
@@ -627,13 +708,13 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
	if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
		return;

	mutex_lock(&dev_priv->drm.struct_mutex);
	guc_log_late_setup(&dev_priv->guc);
	mutex_unlock(&dev_priv->drm.struct_mutex);
}

void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
{
	struct intel_guc *guc = &dev_priv->guc;

	if (!USES_GUC_SUBMISSION(dev_priv))
		return;

@@ -643,6 +724,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
	gen9_disable_guc_interrupts(dev_priv);
	intel_runtime_pm_put(dev_priv);

	guc_log_runtime_destroy(&dev_priv->guc);
	guc_log_runtime_destroy(guc);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	intel_guc_log_relay_destroy(guc);
}
Loading