Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6689c167 authored by McAulay, Alistair's avatar McAulay, Alistair Committed by Daniel Vetter
Browse files

drm/i915: Rework GPU reset sequence to match driver load & thaw

This patch is to address Daniels concerns over different code during reset:

http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html



"The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions."

Tested using igt drv_hangman.

V2: Cleaner way of preventing check_wedge returning -EAGAIN
V3: Clean the last_context during reset, to ensure do_switch() does the MI_SET_CONTEXT. As per review.
Signed-off-by: default avatarMcAulay, Alistair <alistair.mcaulay@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
[danvet: Rebase over ctx->ppgtt rework and extend the comment in
check_wedge a bit.]
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 47c12968
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -844,7 +844,13 @@ int i915_reset(struct drm_device *dev)
			!dev_priv->ums.mm_suspended) {
		dev_priv->ums.mm_suspended = 0;

		/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
		dev_priv->gpu_error.reload_in_reset = true;

		ret = i915_gem_init_hw(dev);

		dev_priv->gpu_error.reload_in_reset = false;

		mutex_unlock(&dev->struct_mutex);
		if (ret) {
			DRM_ERROR("Failed hw init on reset %d\n", ret);
+3 −0
Original line number Diff line number Diff line
@@ -1239,6 +1239,9 @@ struct i915_gpu_error {

	/* For missed irq/seqno simulation. */
	unsigned int test_irq_rings;

	/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset   */
	bool reload_in_reset;
};

enum modeset_restore {
+7 −1
Original line number Diff line number Diff line
@@ -1085,6 +1085,12 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
		if (i915_terminally_wedged(error))
			return -EIO;

		/*
		 * Check if GPU Reset is in progress - we need intel_ring_begin
		 * to work properly to reinit the hw state while the gpu is
		 * still marked as reset-in-progress. Handle this with a flag.
		 */
		if (!error->reload_in_reset)
			return -EAGAIN;
	}

+6 −27
Original line number Diff line number Diff line
@@ -289,34 +289,17 @@ void i915_gem_context_reset(struct drm_device *dev)
	struct drm_i915_private *dev_priv = dev->dev_private;
	int i;

	/* Prevent the hardware from restoring the last context (which hung) on
	 * the next switch */
	for (i = 0; i < I915_NUM_RINGS; i++) {
		struct intel_engine_cs *ring = &dev_priv->ring[i];
		struct intel_context *dctx = ring->default_context;
		struct intel_context *lctx = ring->last_context;

		/* Do a fake switch to the default context */
		if (lctx == dctx)
			continue;

		if (!lctx)
			continue;

		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
						      get_context_alignment(dev), 0));
			/* Fake a finish/inactive */
			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
			dctx->legacy_hw_ctx.rcs_state->active = 0;
		}

		if (lctx) {
			if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
				i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);

			i915_gem_context_unreference(lctx);
		i915_gem_context_reference(dctx);
		ring->last_context = dctx;
			ring->last_context = NULL;
		}
	}
}

@@ -412,10 +395,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
	struct intel_engine_cs *ring;
	int ret, i;

	/* FIXME: We should make this work, even in reset */
	if (i915_reset_in_progress(&dev_priv->gpu_error))
		return 0;

	BUG_ON(!dev_priv->ring[RCS].default_context);

	for_each_ring(ring, dev_priv, i) {
@@ -558,7 +537,7 @@ static int do_switch(struct intel_engine_cs *ring,
	from = ring->last_context;

	if (to->ppgtt) {
		ret = to->ppgtt->switch_mm(to->ppgtt, ring, false);
		ret = to->ppgtt->switch_mm(to->ppgtt, ring);
		if (ret)
			goto unpin_out;
	}
+7 −56
Original line number Diff line number Diff line
@@ -204,19 +204,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,

/* Broadwell Page Directory Pointer Descriptors */
static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
			   uint64_t val, bool synchronous)
			   uint64_t val)
{
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
	int ret;

	BUG_ON(entry >= 4);

	if (synchronous) {
		I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
		I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
		return 0;
	}

	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;
@@ -233,8 +226,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
}

static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
			  struct intel_engine_cs *ring,
			  bool synchronous)
			  struct intel_engine_cs *ring)
{
	int i, ret;

@@ -243,7 +235,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,

	for (i = used_pd - 1; i >= 0; i--) {
		dma_addr_t addr = ppgtt->pd_dma_addr[i];
		ret = gen8_write_pdp(ring, i, addr, synchronous);
		ret = gen8_write_pdp(ring, i, addr);
		if (ret)
			return ret;
	}
@@ -708,29 +700,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
}

static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
			 struct intel_engine_cs *ring,
			 bool synchronous)
			 struct intel_engine_cs *ring)
{
	struct drm_device *dev = ppgtt->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	/* If we're in reset, we can assume the GPU is sufficiently idle to
	 * manually frob these bits. Ideally we could use the ring functions,
	 * except our error handling makes it quite difficult (can't use
	 * intel_ring_begin, ring->flush, or intel_ring_advance)
	 *
	 * FIXME: We should try not to special case reset
	 */
	if (synchronous ||
	    i915_reset_in_progress(&dev_priv->gpu_error)) {
		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
		POSTING_READ(RING_PP_DIR_BASE(ring));
		return 0;
	}

	/* NB: TLBs must be flushed and invalidated before a switch */
	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
	if (ret)
@@ -752,29 +725,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
}

static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
			  struct intel_engine_cs *ring,
			  bool synchronous)
			  struct intel_engine_cs *ring)
{
	struct drm_device *dev = ppgtt->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	/* If we're in reset, we can assume the GPU is sufficiently idle to
	 * manually frob these bits. Ideally we could use the ring functions,
	 * except our error handling makes it quite difficult (can't use
	 * intel_ring_begin, ring->flush, or intel_ring_advance)
	 *
	 * FIXME: We should try not to special case reset
	 */
	if (synchronous ||
	    i915_reset_in_progress(&dev_priv->gpu_error)) {
		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
		POSTING_READ(RING_PP_DIR_BASE(ring));
		return 0;
	}

	/* NB: TLBs must be flushed and invalidated before a switch */
	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
	if (ret)
@@ -803,14 +757,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
}

static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
			  struct intel_engine_cs *ring,
			  bool synchronous)
			  struct intel_engine_cs *ring)
{
	struct drm_device *dev = ppgtt->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;

	if (!synchronous)
		return 0;

	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
@@ -1189,7 +1140,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev)

	if (ppgtt) {
		for_each_ring(ring, dev_priv, i) {
			ret = ppgtt->switch_mm(ppgtt, ring, true);
			ret = ppgtt->switch_mm(ppgtt, ring);
			if (ret != 0)
				return ret;
		}
Loading