Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 47979480 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Squash repeated awaits on the same fence



Track the latest fence waited upon on each context, and only add a new
asynchronous wait if the new fence is more recent than the recorded
fence for that context. This requires us to filter out unordered
timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the
absence of a universal identifier, we have to use our own
i915->mm.unordered_timeline token.

v2: Throw around the debug crutches
v3: Inline the likely case of the pre-allocation cache being full.
v4: Drop the pre-allocation support, we can lose the most recent fence
in case of allocation failure -- it just means we may emit more awaits
than strictly necessary but will not break.
v5: Trim allocation size for leaf nodes, they only need an array of u32
not pointers.
v6: Create mock_timeline to tidy selftest writing
v7: s/intel_timeline_sync_get/intel_timeline_sync_is_later/ (Tvrtko)
v8: Prune the stale sync points when we idle.
v9: Include a small benchmark in the kselftests
v10: Separate the idr implementation into its own compartment. (Tvrkto)
v11: Refactor igt_sync kselftests to avoid deep nesting (Tvrkto)
v12: __sync_leaf_idx() to assert that p->height is 0 when checking leaves
v13: kselftests to investigate struct i915_syncmap itself (Tvrtko)
v14: Foray into ascii art graphs
v15: Take into account that the random lookup/insert does 2 prng calls,
not 1, when benchmarking, and use for_each_set_bit() (Tvrtko)
v16: Improved ascii art

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-4-chris@chris-wilson.co.uk
parent ceae14bd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
	  i915_params.o \
	  i915_pci.o \
          i915_suspend.o \
	  i915_syncmap.o \
	  i915_sw_fence.o \
	  i915_sysfs.o \
	  intel_csr.o \
+1 −0
Original line number Diff line number Diff line
@@ -3196,6 +3196,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
		intel_engine_disarm_breadcrumbs(engine);
		i915_gem_batch_pool_fini(&engine->batch_pool);
	}
	i915_gem_timelines_mark_idle(dev_priv);

	GEM_BUG_ON(!dev_priv->gt.awake);
	dev_priv->gt.awake = false;
+2 −0
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@
#ifndef __I915_GEM_H__
#define __I915_GEM_H__

#include <linux/bug.h>

#ifdef CONFIG_DRM_I915_DEBUG_GEM
#define GEM_BUG_ON(expr) BUG_ON(expr)
#define GEM_WARN_ON(expr) WARN_ON(expr)
+9 −0
Original line number Diff line number Diff line
@@ -773,6 +773,11 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
		if (fence->context == req->fence.context)
			continue;

		/* Squash repeated waits to the same timelines */
		if (fence->context != req->i915->mm.unordered_timeline &&
		    intel_timeline_sync_is_later(req->timeline, fence))
			continue;

		if (dma_fence_is_i915(fence))
			ret = i915_gem_request_await_request(req,
							     to_request(fence));
@@ -782,6 +787,10 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
							    GFP_KERNEL);
		if (ret < 0)
			return ret;

		/* Record the latest fence used against each timeline */
		if (fence->context != req->i915->mm.unordered_timeline)
			intel_timeline_sync_set(req->timeline, fence);
	} while (--nchild);

	return 0;
+77 −18
Original line number Diff line number Diff line
@@ -23,6 +23,32 @@
 */

#include "i915_drv.h"
#include "i915_syncmap.h"

static void __intel_timeline_init(struct intel_timeline *tl,
				  struct i915_gem_timeline *parent,
				  u64 context,
				  struct lock_class_key *lockclass,
				  const char *lockname)
{
	tl->fence_context = context;
	tl->common = parent;
#ifdef CONFIG_DEBUG_SPINLOCK
	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
#else
	spin_lock_init(&tl->lock);
#endif
	init_request_active(&tl->last_request, NULL);
	INIT_LIST_HEAD(&tl->requests);
	i915_syncmap_init(&tl->sync);
}

static void __intel_timeline_fini(struct intel_timeline *tl)
{
	GEM_BUG_ON(!list_empty(&tl->requests));

	i915_syncmap_free(&tl->sync);
}

static int __i915_gem_timeline_init(struct drm_i915_private *i915,
				    struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,

	lockdep_assert_held(&i915->drm.struct_mutex);

	/*
	 * Ideally we want a set of engines on a single leaf as we expect
	 * to mostly be tracking synchronisation between engines. It is not
	 * a huge issue if this is not the case, but we may want to mitigate
	 * any page crossing penalties if they become an issue.
	 */
	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);

	timeline->i915 = i915;
	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
	if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,

	/* Called during early_init before we know how many engines there are */
	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
		struct intel_timeline *tl = &timeline->engine[i];

		tl->fence_context = fences++;
		tl->common = timeline;
#ifdef CONFIG_DEBUG_SPINLOCK
		__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
#else
		spin_lock_init(&tl->lock);
#endif
		init_request_active(&tl->last_request, NULL);
		INIT_LIST_HEAD(&tl->requests);
	}
	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
		__intel_timeline_init(&timeline->engine[i],
				      timeline, fences++,
				      lockclass, lockname);

	return 0;
}
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
					&class, "&global_timeline->lock");
}

void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
/**
 * i915_gem_timelines_mark_idle -- called when the driver idles
 * @i915 - the drm_i915_private device
 *
 * When the driver is completely idle, we know that all of our sync points
 * have been signaled and our tracking is then entirely redundant. Any request
 * to wait upon an older sync point will be completed instantly as we know
 * the fence is signaled and therefore we will not even look them up in the
 * sync point map.
 */
void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
{
	struct i915_gem_timeline *timeline;
	int i;

	lockdep_assert_held(&timeline->i915->drm.struct_mutex);
	lockdep_assert_held(&i915->drm.struct_mutex);

	list_for_each_entry(timeline, &i915->gt.timelines, link) {
		for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
			struct intel_timeline *tl = &timeline->engine[i];

		GEM_BUG_ON(!list_empty(&tl->requests));
			/*
			 * All known fences are completed so we can scrap
			 * the current sync point tracking and start afresh,
			 * any attempt to wait upon a previous sync point
			 * will be skipped as the fence was signaled.
			 */
			i915_syncmap_free(&tl->sync);
		}
	}
}

void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
{
	int i;

	lockdep_assert_held(&timeline->i915->drm.struct_mutex);

	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
		__intel_timeline_fini(&timeline->engine[i]);

	list_del(&timeline->link);
	kfree(timeline->name);
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_timeline.c"
#include "selftests/i915_gem_timeline.c"
#endif
Loading