Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 31685c25 authored by Deepak S's avatar Deepak S Committed by Daniel Vetter
Browse files

drm/i915/vlv: WA for Turbo and RC6 to work together.



With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: default avatarDeepak S <deepak.s@linux.intel.com>
Reviewed-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 8e09bf83
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -902,6 +902,12 @@ struct vlv_s0ix_state {
	u32 clock_gate_dis2;
};

struct intel_rps_ei_calc {
	u32 cz_ts_ei;
	u32 render_ei_c0;
	u32 media_ei_c0;
};

struct intel_gen6_power_mgmt {
	/* work and pm_iir are protected by dev_priv->irq_lock */
	struct work_struct work;
@@ -926,6 +932,8 @@ struct intel_gen6_power_mgmt {
	u8 rp1_freq;		/* "less than" RP0 power/freqency */
	u8 rp0_freq;		/* Non-overclocked max frequency. */

	u32 ei_interrupt_count;

	int last_adj;
	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;

@@ -1527,6 +1535,13 @@ struct drm_i915_private {
	/* gen6+ rps state */
	struct intel_gen6_power_mgmt rps;

	/* rps wa up ei calculation */
	struct intel_rps_ei_calc rps_up_ei;

	/* rps wa down ei calculation */
	struct intel_rps_ei_calc rps_down_ei;


	/* ilk-only ips/rps state. Everything in here is protected by the global
	 * mchdev_lock in intel_pm.c */
	struct intel_ilk_power_mgmt ips;
+132 −1
Original line number Diff line number Diff line
@@ -1272,6 +1272,131 @@ static void notify_ring(struct drm_device *dev,
	i915_queue_hangcheck(dev);
}

static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
				struct  intel_rps_ei_calc *rps_ei)
{
	u32 cz_ts, cz_freq_khz;
	u32 render_count, media_count;
	u32 elapsed_render, elapsed_media, elapsed_time;
	u32 residency = 0;

	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);

	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);

	if (rps_ei->cz_ts_ei == 0) {
		rps_ei->cz_ts_ei = cz_ts;
		rps_ei->render_ei_c0 = render_count;
		rps_ei->media_ei_c0 = media_count;

		return dev_priv->rps.cur_freq;
	}

	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
	rps_ei->cz_ts_ei = cz_ts;

	elapsed_render = render_count - rps_ei->render_ei_c0;
	rps_ei->render_ei_c0 = render_count;

	elapsed_media = media_count - rps_ei->media_ei_c0;
	rps_ei->media_ei_c0 = media_count;

	/* Convert all the counters into common unit of milli sec */
	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
	elapsed_render /=  cz_freq_khz;
	elapsed_media /= cz_freq_khz;

	/*
	 * Calculate overall C0 residency percentage
	 * only if elapsed time is non zero
	 */
	if (elapsed_time) {
		residency =
			((max(elapsed_render, elapsed_media) * 100)
				/ elapsed_time);
	}

	return residency;
}

/**
 * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
 * busy-ness calculated from C0 counters of render & media power wells
 * @dev_priv: DRM device private
 *
 */
static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
{
	u32 residency_C0_up = 0, residency_C0_down = 0;
	u8 new_delay, adj;

	dev_priv->rps.ei_interrupt_count++;

	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));


	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
		return dev_priv->rps.cur_freq;
	}


	/*
	 * To down throttle, C0 residency should be less than down threshold
	 * for continous EI intervals. So calculate down EI counters
	 * once in VLV_INT_COUNT_FOR_DOWN_EI
	 */
	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {

		dev_priv->rps.ei_interrupt_count = 0;

		residency_C0_down = vlv_c0_residency(dev_priv,
						&dev_priv->rps_down_ei);
	} else {
		residency_C0_up = vlv_c0_residency(dev_priv,
						&dev_priv->rps_up_ei);
	}

	new_delay = dev_priv->rps.cur_freq;

	adj = dev_priv->rps.last_adj;
	/* C0 residency is greater than UP threshold. Increase Frequency */
	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
		if (adj > 0)
			adj *= 2;
		else
			adj = 1;

		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
			new_delay = dev_priv->rps.cur_freq + adj;

		/*
		 * For better performance, jump directly
		 * to RPe if we're below it.
		 */
		if (new_delay < dev_priv->rps.efficient_freq)
			new_delay = dev_priv->rps.efficient_freq;

	} else if (!dev_priv->rps.ei_interrupt_count &&
			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
		if (adj < 0)
			adj *= 2;
		else
			adj = -1;
		/*
		 * This means, C0 residency is less than down threshold over
		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
		 */
		if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
			new_delay = dev_priv->rps.cur_freq + adj;
	}

	return new_delay;
}

static void gen6_pm_rps_work(struct work_struct *work)
{
	struct drm_i915_private *dev_priv =
@@ -1320,6 +1445,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
		else
			new_delay = dev_priv->rps.min_freq_softlimit;
		adj = 0;
	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
		if (adj < 0)
			adj *= 2;
@@ -4511,6 +4638,10 @@ void intel_irq_init(struct drm_device *dev)
	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);

	/* Let's track the enabled rps events */
	if (IS_VALLEYVIEW(dev))
		/* WaGsvRC0ResidenncyMethod:VLV */
		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
	else
		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;

	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
+11 −0
Original line number Diff line number Diff line
@@ -531,6 +531,7 @@ enum punit_power_well {
#define PUNIT_REG_GPU_FREQ_STS			0xd8
#define   GENFREQSTATUS				(1<<0)
#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
#define PUNIT_REG_CZ_TIMESTAMP			0xce

#define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
#define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -556,6 +557,11 @@ enum punit_power_well {
#define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
#define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000

#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
#define VLV_RP_UP_EI_THRESHOLD			90
#define VLV_RP_DOWN_EI_THRESHOLD		70
#define VLV_INT_COUNT_FOR_DOWN_EI		5

/* vlv2 north clock has */
#define CCK_FUSE_REG				0x8
#define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -5394,6 +5400,7 @@ enum punit_power_well {
#define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
#define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
#define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
#define VLV_GTLC_SURVIVABILITY_REG              0x130098
#define  FORCEWAKE_MT				0xa188 /* multi-threaded */
#define   FORCEWAKE_KERNEL			0x1
#define   FORCEWAKE_USER			0x2
@@ -5541,6 +5548,8 @@ enum punit_power_well {
#define GEN6_GT_GFX_RC6_LOCKED			0x138104
#define VLV_COUNTER_CONTROL			0x138104
#define   VLV_COUNT_RANGE_HIGH			(1<<15)
#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
#define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
#define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
#define GEN6_GT_GFX_RC6				0x138108
@@ -5549,6 +5558,8 @@ enum punit_power_well {

#define GEN6_GT_GFX_RC6p			0x13810C
#define GEN6_GT_GFX_RC6pp			0x138110
#define VLV_RENDER_C0_COUNT_REG		0x138118
#define VLV_MEDIA_C0_COUNT_REG			0x13811C

#define GEN6_PCODE_MAILBOX			0x138124
#define   GEN6_PCODE_READY			(1<<31)
+9 −3
Original line number Diff line number Diff line
@@ -3282,6 +3282,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)

	vlv_force_gfx_clock(dev_priv, false);

	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
	else 
		I915_WRITE(GEN6_PMINTRMSK,
			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
}
@@ -4125,6 +4128,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
	I915_WRITE(GEN6_RP_DOWN_EI, 350000);

	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);

	I915_WRITE(GEN6_RP_CONTROL,
		   GEN6_RP_MEDIA_TURBO |
@@ -4145,9 +4149,11 @@ static void valleyview_enable_rps(struct drm_device *dev)

	/* allows RC6 residency counter to work */
	I915_WRITE(VLV_COUNTER_CONTROL,
		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
				      VLV_RENDER_RC0_COUNT_EN |
				      VLV_MEDIA_RC6_COUNT_EN |
				      VLV_RENDER_RC6_COUNT_EN));

	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;