Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b8f55be6 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Split obj->cache_coherent to track r/w

Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.

See commit e27ab73d ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.

v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
v4: Fix inverted coherency check prior to execbuf (from v2)
v5: Add comment for nasty code where we are optimising on gcc's behalf.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555


Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170811111116.10373-1-chris@chris-wilson.co.uk


Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent 9c3a16c8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
	  i915_gem_gtt.o \
	  i915_gem_internal.o \
	  i915_gem.o \
	  i915_gem_object.o \
	  i915_gem_render_state.o \
	  i915_gem_request.o \
	  i915_gem_shrinker.o \
+0 −6
Original line number Diff line number Diff line
@@ -4322,10 +4322,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
		     unsigned long addr, unsigned long pfn, unsigned long size,
		     struct io_mapping *iomap);

static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
{
	return (obj->cache_level != I915_CACHE_NONE ||
		HAS_LLC(to_i915(obj->base.dev)));
}

#endif
+13 −12
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
	if (obj->cache_dirty)
		return false;

	if (!obj->cache_coherent)
	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
		return true;

	return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,

	if (needs_clflush &&
	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
	    !obj->cache_coherent)
	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
		drm_clflush_sg(pages);

	__start_cpu_write(obj);
@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
	if (ret)
		return ret;

	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
		ret = i915_gem_object_set_to_cpu_domain(obj, false);
		if (ret)
			goto err_unpin;
@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
	if (ret)
		return ret;

	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
		ret = i915_gem_object_set_to_cpu_domain(obj, true);
		if (ret)
			goto err_unpin;
@@ -3673,8 +3675,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,

	list_for_each_entry(vma, &obj->vma_list, obj_link)
		vma->node.color = cache_level;
	obj->cache_level = cache_level;
	obj->cache_coherent = i915_gem_object_is_coherent(obj);
	i915_gem_object_set_cache_coherency(obj, cache_level);
	obj->cache_dirty = true; /* Always invalidate stale cachelines */

	return 0;
@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
	struct drm_i915_gem_object *obj;
	struct address_space *mapping;
	unsigned int cache_level;
	gfp_t mask;
	int ret;

@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;

	if (HAS_LLC(dev_priv)) {
	if (HAS_LLC(dev_priv))
		/* On some devices, we can have the GPU use the LLC (the CPU
		 * cache) for about a 10% performance improvement
		 * compared to uncached.  Graphics requests other than
@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
		 * However, we maintain the display planes as UC, and so
		 * need to rebind when first used as such.
		 */
		obj->cache_level = I915_CACHE_LLC;
	} else
		obj->cache_level = I915_CACHE_NONE;
		cache_level = I915_CACHE_LLC;
	else
		cache_level = I915_CACHE_NONE;

	obj->cache_coherent = i915_gem_object_is_coherent(obj);
	obj->cache_dirty = !obj->cache_coherent;
	i915_gem_object_set_cache_coherency(obj, cache_level);

	trace_i915_gem_object_create(obj);

+2 −1
Original line number Diff line number Diff line
@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
	 * snooping behaviour occurs naturally as the result of our domain
	 * tracking.
	 */
	if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
	if (!(flags & I915_CLFLUSH_FORCE) &&
	    obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
		return false;

	trace_i915_gem_object_clflush(obj);
+13 −1
Original line number Diff line number Diff line
@@ -1842,7 +1842,19 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
			eb->request->capture_list = capture;
		}

		if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
		/*
		 * If the GPU is not _reading_ through the CPU cache, we need
		 * to make sure that any writes (both previous GPU writes from
		 * before a change in snooping levels and normal CPU writes)
		 * caught in that cache are flushed to main memory.
		 *
		 * We want to say
		 *   obj->cache_dirty &&
		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
		 * but gcc's optimiser doesn't handle that as well and emits
		 * two jumps instead of one. Maybe one day...
		 */
		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
			if (i915_gem_clflush_object(obj, 0))
				entry->flags &= ~EXEC_OBJECT_ASYNC;
		}
Loading