Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b45305fc authored by Daniel Vetter's avatar Daniel Vetter
Browse files

drm/i915: Implement workaround for broken CS tlb on i830/845



Now that Chris Wilson demonstrated that the key for stability on early
gen 2 is to simple _never_ exchange the physical backing storage of
batch buffers I've tried a stab at a kernel solution. Doesn't look too
nefarious imho, now that I don't try to be too clever for my own good
any more.

v2: After discussing the various techniques, we've decided to always blit
batches on the suspect devices, but allow userspace to opt out of the
kernel workaround assume full responsibility for providing coherent
batches. The principal reason is that avoiding the blit does improve
performance in a few key microbenchmarks and also in cairo-trace
replays.

Signed-Off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
[danvet:
- Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring
  wrap w/a. Suggested by Chris Wilson.
- Also add the ACTHD check from Chris Wilson for the error state
  dumping, so that we still catch batches when userspace opts out of
  the w/a.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 6547fbdb
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
	case I915_PARAM_HAS_SECURE_BATCHES:
		value = capable(CAP_SYS_ADMIN);
		break;
	case I915_PARAM_HAS_PINNED_BATCHES:
		value = 1;
		break;
	default:
		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
				 param->param);
+4 −0
Original line number Diff line number Diff line
@@ -1100,6 +1100,7 @@ struct drm_i915_gem_object {
	 */
	atomic_t pending_flip;
};
#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)

#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)

@@ -1199,6 +1200,9 @@ struct drm_i915_file_private {
#define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
#define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)

/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))

/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
 * rows, which changed the alignment requirements and fence programming.
 */
+2 −0
Original line number Diff line number Diff line
@@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,

		flags |= I915_DISPATCH_SECURE;
	}
	if (args->flags & I915_EXEC_IS_PINNED)
		flags |= I915_DISPATCH_PINNED;

	switch (args->flags & I915_EXEC_RING_MASK) {
	case I915_EXEC_DEFAULT:
+12 −0
Original line number Diff line number Diff line
@@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
	if (!ring->get_seqno)
		return NULL;

	if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
		u32 acthd = I915_READ(ACTHD);

		if (WARN_ON(ring->id != RCS))
			return NULL;

		obj = ring->private;
		if (acthd >= obj->gtt_offset &&
		    acthd < obj->gtt_offset + obj->base.size)
			return i915_error_object_create(dev_priv, obj);
	}

	seqno = ring->get_seqno(ring, false);
	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
		if (obj->ring != ring)
+68 −8
Original line number Diff line number Diff line
@@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring)

static void render_ring_cleanup(struct intel_ring_buffer *ring)
{
	struct drm_device *dev = ring->dev;

	if (!ring->private)
		return;

	if (HAS_BROKEN_CS_TLB(dev))
		drm_gem_object_unreference(to_gem_object(ring->private));

	cleanup_pipe_control(ring);
}

@@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
	return 0;
}

/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
#define I830_BATCH_LIMIT (256*1024)
static int
i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
				u32 offset, u32 len,
@@ -976,6 +983,7 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
{
	int ret;

	if (flags & I915_DISPATCH_PINNED) {
		ret = intel_ring_begin(ring, 4);
		if (ret)
			return ret;
@@ -983,8 +991,39 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
		intel_ring_emit(ring, MI_BATCH_BUFFER);
		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
		intel_ring_emit(ring, offset + len - 8);
		intel_ring_emit(ring, MI_NOOP);
		intel_ring_advance(ring);
	} else {
		struct drm_i915_gem_object *obj = ring->private;
		u32 cs_offset = obj->gtt_offset;

		if (len > I830_BATCH_LIMIT)
			return -ENOSPC;

		ret = intel_ring_begin(ring, 9+3);
		if (ret)
			return ret;
		/* Blit the batch (which has now all relocs applied) to the stable batch
		 * scratch bo area (so that the CS never stumbles over its tlb
		 * invalidation bug) ... */
		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
				XY_SRC_COPY_BLT_WRITE_ALPHA |
				XY_SRC_COPY_BLT_WRITE_RGB);
		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
		intel_ring_emit(ring, 0);
		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
		intel_ring_emit(ring, cs_offset);
		intel_ring_emit(ring, 0);
		intel_ring_emit(ring, 4096);
		intel_ring_emit(ring, offset);
		intel_ring_emit(ring, MI_FLUSH);

		/* ... and execute it. */
		intel_ring_emit(ring, MI_BATCH_BUFFER);
		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
		intel_ring_emit(ring, cs_offset + len - 8);
		intel_ring_advance(ring);
	}

	return 0;
}
@@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
	ring->init = init_render_ring;
	ring->cleanup = render_ring_cleanup;

	/* Workaround batchbuffer to combat CS tlb bug. */
	if (HAS_BROKEN_CS_TLB(dev)) {
		struct drm_i915_gem_object *obj;
		int ret;

		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
		if (obj == NULL) {
			DRM_ERROR("Failed to allocate batch bo\n");
			return -ENOMEM;
		}

		ret = i915_gem_object_pin(obj, 0, true, false);
		if (ret != 0) {
			drm_gem_object_unreference(&obj->base);
			DRM_ERROR("Failed to ping batch bo\n");
			return ret;
		}

		ring->private = obj;
	}

	return intel_init_ring_buffer(dev, ring);
}

Loading