Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7fc92e96 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty



For ease of use (i.e. avoiding a few checks and function calls), store
the object's cache coherency next to the cache is dirty bit.

Specifically this patch aims to reduce the frequency of no-op calls to
i915_gem_object_clflush() to counter-act the increase of such calls for
GPU only objects in the previous patch.

v2: Replace cache_dirty & ~cache_coherent with cache_dirty &&
!cache_coherent as gcc generates much better code for the latter
(Tvrtko)

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Tested-by: default avatarDongwon Kim <dongwon.kim@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170616105455.16977-1-chris@chris-wilson.co.uk


Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
parent e27ab73d
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
	if (obj->cache_dirty)
		return false;

	if (!i915_gem_object_is_coherent(obj))
	if (!obj->cache_coherent)
		return true;

	return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,

	if (needs_clflush &&
	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
	    !i915_gem_object_is_coherent(obj))
	    !obj->cache_coherent)
		drm_clflush_sg(pages);

	__start_cpu_write(obj);
@@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
	if (ret)
		return ret;

	if (i915_gem_object_is_coherent(obj) ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
		ret = i915_gem_object_set_to_cpu_domain(obj, false);
		if (ret)
			goto err_unpin;
@@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
	if (ret)
		return ret;

	if (i915_gem_object_is_coherent(obj) ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
	if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
		ret = i915_gem_object_set_to_cpu_domain(obj, true);
		if (ret)
			goto err_unpin;
@@ -3684,6 +3682,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
	list_for_each_entry(vma, &obj->vma_list, obj_link)
		vma->node.color = cache_level;
	obj->cache_level = cache_level;
	obj->cache_coherent = i915_gem_object_is_coherent(obj);
	obj->cache_dirty = true; /* Always invalidate stale cachelines */

	return 0;
@@ -4344,7 +4343,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
	} else
		obj->cache_level = I915_CACHE_NONE;

	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
	obj->cache_coherent = i915_gem_object_is_coherent(obj);
	obj->cache_dirty = !obj->cache_coherent;

	trace_i915_gem_object_create(obj);

+1 −1
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
	 * snooping behaviour occurs naturally as the result of our domain
	 * tracking.
	 */
	if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
	if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
		return;

	trace_i915_gem_object_clflush(obj);
+1 −1
Original line number Diff line number Diff line
@@ -1110,7 +1110,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
			continue;

		if (obj->cache_dirty)
		if (unlikely(obj->cache_dirty && !obj->cache_coherent))
			i915_gem_clflush_object(obj, 0);

		ret = i915_gem_request_await_object
+2 −1
Original line number Diff line number Diff line
@@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
	obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
	obj->cache_dirty = !i915_gem_object_is_coherent(obj);
	obj->cache_coherent = i915_gem_object_is_coherent(obj);
	obj->cache_dirty = !obj->cache_coherent;

	return obj;
}
+1 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ struct drm_i915_gem_object {
	unsigned long gt_ro:1;
	unsigned int cache_level:3;
	unsigned int cache_dirty:1;
	unsigned int cache_coherent:1;

	atomic_t frontbuffer_bits;
	unsigned int frontbuffer_ggtt_origin; /* write once */
Loading