Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3176ff49 authored by Michał Winiarski's avatar Michał Winiarski Committed by Chris Wilson
Browse files

drm/i915/guc: Move GuC workqueue allocations outside of the mutex



This gets rid of the following lockdep splat:

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted
------------------------------------------------------
debugfs_test/1351 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #6 (&mm->mmap_sem){++++}:
       __might_fault+0x63/0x90
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xe6/0x150
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #5 (&sb->s_type->i_mutex_key#5){++++}:
       lockref_get+0x9/0x20

-> #4 ((completion)&req.done){+.+.}:
       wait_for_common+0x54/0x210
       devtmpfs_create_node+0x130/0x150
       device_add+0x5ad/0x5e0
       device_create_groups_vargs+0xd4/0xe0
       device_create+0x35/0x40
       msr_device_create+0x22/0x40
       cpuhp_invoke_callback+0xc5/0xbf0
       cpuhp_thread_fun+0x167/0x210
       smpboot_thread_fn+0x17f/0x270
       kthread+0x173/0x1b0
       ret_from_fork+0x24/0x30

-> #3 (cpuhp_state-up){+.+.}:
       cpuhp_issue_call+0x132/0x1c0
       __cpuhp_setup_state_cpuslocked+0x12f/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_writeback_init+0x3a/0x5c
       start_kernel+0x393/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #2 (cpuhp_state_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       __cpuhp_setup_state_cpuslocked+0x4b/0x2a0
       __cpuhp_setup_state+0x3a/0x50
       page_alloc_init+0x1f/0x26
       start_kernel+0x139/0x3e2
       secondary_startup_64+0xa5/0xb0

-> #1 (cpu_hotplug_lock.rw_sem){++++}:
       cpus_read_lock+0x34/0xa0
       apply_workqueue_attrs+0xd/0x40
       __alloc_workqueue_key+0x2c7/0x4e1
       intel_guc_submission_init+0x10c/0x650 [i915]
       intel_uc_init_hw+0x29e/0x460 [i915]
       i915_gem_init_hw+0xca/0x290 [i915]
       i915_gem_init+0x115/0x3a0 [i915]
       i915_driver_load+0x9a8/0x16c0 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       lock_acquire+0xaf/0x200
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x760 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x85b/0xe40
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x22/0x30

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#5);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1351:
 #0:  (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1
Hardware name:                  /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug+0x230/0x3b0
 check_prev_add+0x439/0x7b0
 ? lockdep_init_map_crosslock+0x20/0x20
 ? unwind_get_return_address+0x16/0x30
 ? __lock_acquire+0x1385/0x15a0
 __lock_acquire+0x1385/0x15a0
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x760 [i915]
 __do_fault+0x15/0x70
 __handle_mm_fault+0x85b/0xe40
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 page_fault+0x22/0x30
RIP: 0033:0x7f98d6f49116
RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283
RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680
RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0
RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0
R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001
R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0

v2: Init preempt_work unconditionally (Chris)
v3: Mention that we need the enable_guc=1 for lockdep splat (Chris)

Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1
Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
parent 1bbbca0d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
	i915_gem_contexts_fini(dev_priv);
	mutex_unlock(&dev_priv->drm.struct_mutex);

	intel_uc_fini_wq(dev_priv);
	i915_gem_cleanup_userptr(dev_priv);

	i915_gem_drain_freed_objects(dev_priv);
+4 −0
Original line number Diff line number Diff line
@@ -5163,6 +5163,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
	if (ret)
		return ret;

	ret = intel_uc_init_wq(dev_priv);
	if (ret)
		return ret;

	/* This is just a security blanket to placate dragons.
	 * On some systems, we very sporadically observe that the first TLBs
	 * used by the CS may be stale, despite us poking the TLB reset. If
+57 −0
Original line number Diff line number Diff line
@@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc)
	guc->notify = gen8_guc_raise_irq;
}

int intel_guc_init_wq(struct intel_guc *guc)
{
	struct drm_i915_private *dev_priv = guc_to_i915(guc);

	/*
	 * GuC log buffer flush work item has to do register access to
	 * send the ack to GuC and this work item, if not synced before
	 * suspend, can potentially get executed after the GFX device is
	 * suspended.
	 * By marking the WQ as freezable, we don't have to bother about
	 * flushing of this work item from the suspend hooks, the pending
	 * work item if any will be either executed before the suspend
	 * or scheduled later on resume. This way the handling of work
	 * item can be kept same between system suspend & rpm suspend.
	 */
	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
						WQ_HIGHPRI | WQ_FREEZABLE);
	if (!guc->log.runtime.flush_wq)
		return -ENOMEM;

	/*
	 * Even though both sending GuC action, and adding a new workitem to
	 * GuC workqueue are serialized (each with its own locking), since
	 * we're using mutliple engines, it's possible that we're going to
	 * issue a preempt request with two (or more - each for different
	 * engine) workitems in GuC queue. In this situation, GuC may submit
	 * all of them, which will make us very confused.
	 * Our preemption contexts may even already be complete - before we
	 * even had the chance to sent the preempt action to GuC!. Rather
	 * than introducing yet another lock, we can just use ordered workqueue
	 * to make sure we're always sending a single preemption request with a
	 * single workitem.
	 */
	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
	    USES_GUC_SUBMISSION(dev_priv)) {
		guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
							  WQ_HIGHPRI);
		if (!guc->preempt_wq) {
			destroy_workqueue(guc->log.runtime.flush_wq);
			return -ENOMEM;
		}
	}

	return 0;
}

void intel_guc_fini_wq(struct intel_guc *guc)
{
	struct drm_i915_private *dev_priv = guc_to_i915(guc);

	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
	    USES_GUC_SUBMISSION(dev_priv))
		destroy_workqueue(guc->preempt_wq);

	destroy_workqueue(guc->log.runtime.flush_wq);
}

static int guc_shared_data_create(struct intel_guc *guc)
{
	struct i915_vma *vma;
+2 −0
Original line number Diff line number Diff line
@@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
void intel_guc_init_early(struct intel_guc *guc);
void intel_guc_init_send_regs(struct intel_guc *guc);
void intel_guc_init_params(struct intel_guc *guc);
int intel_guc_init_wq(struct intel_guc *guc);
void intel_guc_fini_wq(struct intel_guc *guc);
int intel_guc_init(struct intel_guc *guc);
void intel_guc_fini(struct intel_guc *guc);
int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
+0 −23
Original line number Diff line number Diff line
@@ -411,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc)
	guc->log.runtime.relay_chan = guc_log_relay_chan;

	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);

	/*
	 * GuC log buffer flush work item has to do register access to
	 * send the ack to GuC and this work item, if not synced before
	 * suspend, can potentially get executed after the GFX device is
	 * suspended.
	 * By marking the WQ as freezable, we don't have to bother about
	 * flushing of this work item from the suspend hooks, the pending
	 * work item if any will be either executed before the suspend
	 * or scheduled later on resume. This way the handling of work
	 * item can be kept same between system suspend & rpm suspend.
	 */
	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
						WQ_HIGHPRI | WQ_FREEZABLE);
	if (!guc->log.runtime.flush_wq) {
		DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
		ret = -ENOMEM;
		goto err_relaychan;
	}

	return 0;

err_relaychan:
	relay_close(guc->log.runtime.relay_chan);
err_vaddr:
	i915_gem_object_unpin_map(guc->log.vma->obj);
	guc->log.runtime.buf_addr = NULL;
@@ -450,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc)
	if (!guc_log_has_runtime(guc))
		return;

	destroy_workqueue(guc->log.runtime.flush_wq);
	relay_close(guc->log.runtime.relay_chan);
	i915_gem_object_unpin_map(guc->log.vma->obj);
	guc->log.runtime.buf_addr = NULL;
Loading