Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 52a42cec authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/cmdparser: Accelerate copies from WC memory



If we need to use clflush to prepare our batch for reads from memory, we
can bypass the cache instead by using non-temporal copies.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.william.auld@gmail.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-39-chris@chris-wilson.co.uk
parent 76ff480e
Loading
Loading
Loading
Loading
+43 −27
Original line number Diff line number Diff line
@@ -965,8 +965,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
{
	unsigned int src_needs_clflush;
	unsigned int dst_needs_clflush;
	void *dst, *ptr;
	int offset, n;
	void *dst, *src;
	int ret;

	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
@@ -983,32 +982,49 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
	if (IS_ERR(dst))
		goto unpin_dst;

	ptr = dst;
	src = ERR_PTR(-ENODEV);
	if (src_needs_clflush &&
	    i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
		if (!IS_ERR(src)) {
			i915_memcpy_from_wc(dst,
					    src + batch_start_offset,
					    ALIGN(batch_len, 16));
			i915_gem_object_unpin_map(src_obj);
		}
	}
	if (IS_ERR(src)) {
		void *ptr;
		int offset, n;

		offset = offset_in_page(batch_start_offset);

	/* We can avoid clflushing partial cachelines before the write if we
	 * only every write full cache-lines. Since we know that both the
	 * source and destination are in multiples of PAGE_SIZE, we can simply
	 * round up to the next cacheline. We don't care about copying too much
	 * here as we only validate up to the end of the batch.
		/* We can avoid clflushing partial cachelines before the write
		 * if we only every write full cache-lines. Since we know that
		 * both the source and destination are in multiples of
		 * PAGE_SIZE, we can simply round up to the next cacheline.
		 * We don't care about copying too much here as we only
		 * validate up to the end of the batch.
		 */
		if (dst_needs_clflush & CLFLUSH_BEFORE)
		batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
			batch_len = roundup(batch_len,
					    boot_cpu_data.x86_clflush_size);

		ptr = dst;
		for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
			int len = min_t(int, batch_len, PAGE_SIZE - offset);
		void *vaddr;

		vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
			if (src_needs_clflush)
			drm_clflush_virt_range(vaddr + offset, len);
		memcpy(ptr, vaddr + offset, len);
		kunmap_atomic(vaddr);
				drm_clflush_virt_range(src + offset, len);
			memcpy(ptr, src + offset, len);
			kunmap_atomic(src);

			ptr += len;
			batch_len -= len;
			offset = 0;
		}
	}

	/* dst_obj is returned with vmap pinned */
	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;