Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit feff0dc6 authored by Tvrtko Ursulin's avatar Tvrtko Ursulin
Browse files

drm/i915/pmu: Suspend sampling when GPU is idle



If only a subset of events is enabled we can afford to suspend
the sampling timer when the GPU is idle and so save some cycles
and power.

v2: Rebase and limit timer even more.
v3: Rebase.
v4: Rebase.
v5: Skip action if perf PMU failed to register.
v6: Checkpatch cleanup.
v7:
 * Add a common helper to start the timer if needed. (Chris Wilson)
 * Add comment explaining bitwise logic in pmu_needs_timer.
v8: Fix some comments styles. (Chris Wilson)
v9: Rebase.
v10: Move function declarations to i915_pmu.h.
v11: Rename functions to i915_pmu_gt_(un)parked. (Chris Wilson)

Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-3-tvrtko.ursulin@linux.intel.com
parent b46a33e2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3372,6 +3372,7 @@ i915_gem_idle_work_handler(struct work_struct *work)

	intel_engines_park(dev_priv);
	i915_gem_timelines_mark_idle(dev_priv);
	i915_pmu_gt_parked(dev_priv);

	GEM_BUG_ON(!dev_priv->gt.awake);
	dev_priv->gt.awake = false;
+1 −0
Original line number Diff line number Diff line
@@ -258,6 +258,7 @@ static void mark_busy(struct drm_i915_private *i915)
	i915_update_gfx_val(i915);
	if (INTEL_GEN(i915) >= 6)
		gen6_rps_busy(i915);
	i915_pmu_gt_unparked(i915);

	intel_engines_unpark(i915);

+78 −10
Original line number Diff line number Diff line
@@ -90,6 +90,75 @@ static unsigned int event_enabled_bit(struct perf_event *event)
	return config_enabled_bit(event->attr.config);
}

static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
{
	u64 enable;

	/*
	 * Only some counters need the sampling timer.
	 *
	 * We start with a bitmask of all currently enabled events.
	 */
	enable = i915->pmu.enable;

	/*
	 * Mask out all the ones which do not need the timer, or in
	 * other words keep all the ones that could need the timer.
	 */
	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
		  ENGINE_SAMPLE_MASK;

	/*
	 * When the GPU is idle per-engine counters do not need to be
	 * running so clear those bits out.
	 */
	if (!gpu_active)
		enable &= ~ENGINE_SAMPLE_MASK;

	/*
	 * If some bits remain it means we need the sampling timer running.
	 */
	return enable;
}

void i915_pmu_gt_parked(struct drm_i915_private *i915)
{
	if (!i915->pmu.base.event_init)
		return;

	spin_lock_irq(&i915->pmu.lock);
	/*
	 * Signal sampling timer to stop if only engine events are enabled and
	 * GPU went idle.
	 */
	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
	spin_unlock_irq(&i915->pmu.lock);
}

static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
{
	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
		i915->pmu.timer_enabled = true;
		hrtimer_start_range_ns(&i915->pmu.timer,
				       ns_to_ktime(PERIOD), 0,
				       HRTIMER_MODE_REL_PINNED);
	}
}

void i915_pmu_gt_unparked(struct drm_i915_private *i915)
{
	if (!i915->pmu.base.event_init)
		return;

	spin_lock_irq(&i915->pmu.lock);
	/*
	 * Re-enable sampling timer when GPU goes active.
	 */
	__i915_pmu_maybe_start_timer(i915);
	spin_unlock_irq(&i915->pmu.lock);
}

static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
{
	if (!fw)
@@ -187,7 +256,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
	struct drm_i915_private *i915 =
		container_of(hrtimer, struct drm_i915_private, pmu.timer);

	if (i915->pmu.enable == 0)
	if (!READ_ONCE(i915->pmu.timer_enabled))
		return HRTIMER_NORESTART;

	engines_sample(i915);
@@ -339,14 +408,6 @@ static void i915_pmu_enable(struct perf_event *event)

	spin_lock_irqsave(&i915->pmu.lock, flags);

	/*
	 * Start the sampling timer when enabling the first event.
	 */
	if (i915->pmu.enable == 0)
		hrtimer_start_range_ns(&i915->pmu.timer,
				       ns_to_ktime(PERIOD), 0,
				       HRTIMER_MODE_REL_PINNED);

	/*
	 * Update the bitmask of enabled events and increment
	 * the event reference counter.
@@ -356,6 +417,11 @@ static void i915_pmu_enable(struct perf_event *event)
	i915->pmu.enable |= BIT_ULL(bit);
	i915->pmu.enable_count[bit]++;

	/*
	 * Start the sampling timer if needed and not already enabled.
	 */
	__i915_pmu_maybe_start_timer(i915);

	/*
	 * For per-engine events the bitmask and reference counting
	 * is stored per engine.
@@ -418,8 +484,10 @@ static void i915_pmu_disable(struct perf_event *event)
	 * Decrement the reference count and clear the enabled
	 * bitmask when the last listener on an event goes away.
	 */
	if (--i915->pmu.enable_count[bit] == 0)
	if (--i915->pmu.enable_count[bit] == 0) {
		i915->pmu.enable &= ~BIT_ULL(bit);
		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
	}

	spin_unlock_irqrestore(&i915->pmu.lock, flags);
}
+8 −0
Original line number Diff line number Diff line
@@ -82,6 +82,10 @@ struct i915_pmu {
	 * are using the PMU API.
	 */
	unsigned int enable_count[I915_PMU_MASK_BITS];
	/**
	 * @timer_enabled: Should the internal sampling timer be running.
	 */
	bool timer_enabled;
	/**
	 * @sample: Current and previous (raw) counters for sampling events.
	 *
@@ -96,9 +100,13 @@ struct i915_pmu {
#ifdef CONFIG_PERF_EVENTS
void i915_pmu_register(struct drm_i915_private *i915);
void i915_pmu_unregister(struct drm_i915_private *i915);
void i915_pmu_gt_parked(struct drm_i915_private *i915);
void i915_pmu_gt_unparked(struct drm_i915_private *i915);
#else
static inline void i915_pmu_register(struct drm_i915_private *i915) {}
static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {}
#endif

#endif