Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 98a2f411 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Allow disabling error capture



We currently capture the GPU state after we detect a hang. This is vital
for us to both triage and debug hangs in the wild (post-mortem
debugging). However, it comes at the cost of running some potentially
dangerous code (since it has to make very few assumption about the state
of the driver) that is quite resource intensive.

This patch introduces both a method to disable error capture at runtime
(for users who hit bugs at runtime and need a workaround) and to disable
error capture at compiletime (for realtime users who want to minimise
any possible latency, and never require error capture, saving ~30k of
code). The cost is that we now have to be wary of (and test!) a kconfig
flag and a module parameter. The effect of the module parameter is easy
to verify through code inspection and runtime testing, but a kconfig flag
needs regular compile checking.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: default avatarJani Nikula <jani.nikula@linux.intel.com>
Acked-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161012090522.367-2-chris@chris-wilson.co.uk
parent 0e704476
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -46,6 +46,19 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT

	  If in doubt, say "N".

config DRM_I915_CAPTURE_ERROR
	bool "Enable capturing GPU state following a hang"
	depends on DRM_I915
	default y
	help
	  This option enables capturing the GPU state when a hang is detected.
	  This information is vital for triaging hangs and assists in debugging.
	  Please report any hang to
            https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
	  for triaging.

	  If in doubt, say "Y".

config DRM_I915_USERPTR
	bool "Always enable userptr support"
	depends on DRM_I915
+3 −1
Original line number Diff line number Diff line
@@ -42,7 +42,6 @@ i915-y += i915_cmd_parser.o \
	  i915_gem_stolen.o \
	  i915_gem_tiling.o \
	  i915_gem_userptr.o \
	  i915_gpu_error.o \
	  i915_trace_points.o \
	  intel_breadcrumbs.o \
	  intel_engine_cs.o \
@@ -107,6 +106,9 @@ i915-y += dvo_ch7017.o \
	  intel_sdvo.o \
	  intel_tv.o

# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o

# virtual gpu code
i915-y += i915_vgpu.o

+6 −0
Original line number Diff line number Diff line
@@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
	return 0;
}

#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)

static ssize_t
i915_error_state_write(struct file *filp,
		       const char __user *ubuf,
@@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
	.release = i915_error_state_release,
};

#endif

static int
i915_next_seqno_get(void *data, u64 *val)
{
@@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
	{"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
	{"i915_error_state", &i915_error_state_fops},
#endif
	{"i915_next_seqno", &i915_next_seqno_fops},
	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
+16 −0
Original line number Diff line number Diff line
@@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
#endif

/* i915_gpu_error.c */
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)

__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
@@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
void i915_destroy_error_state(struct drm_device *dev);

#else

static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
					    u32 engine_mask,
					    const char *error_msg)
{
}

static inline void i915_destroy_error_state(struct drm_device *dev)
{
}

#endif

const char *i915_cache_level_str(struct drm_i915_private *i915, int type);

/* i915_cmd_parser.c */
+3 −0
Original line number Diff line number Diff line
@@ -1464,6 +1464,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
	struct drm_i915_error_state *error;
	unsigned long flags;

	if (!i915.error_capture)
		return;

	if (READ_ONCE(dev_priv->gpu_error.first_error))
		return;

Loading