Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7e452fdb authored by Kumar, Mahesh's avatar Kumar, Mahesh Committed by Maarten Lankhorst
Browse files

drm/i915/skl+: Optimize WM calculation



Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuse during each level-wm calculation.

Changes since V1:
 - rebase on top of Rodrigo's series for CNL

Signed-off-by: default avatarMahesh Kumar <mahesh1.kumar@intel.com>
Acked-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com
parent 0b4d7cbf
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -1837,6 +1837,20 @@ struct skl_wm_level {
	uint8_t plane_res_l;
};

/* Stores plane specific WM parameters */
struct skl_wm_params {
	bool x_tiled, y_tiled;
	bool rc_surface;
	uint32_t width;
	uint8_t cpp;
	uint32_t plane_pixel_rate;
	uint32_t y_min_scanlines;
	uint32_t plane_bytes_per_line;
	uint_fixed_16_16_t plane_blocks_per_line;
	uint_fixed_16_16_t y_tile_minimum;
	uint32_t linetime_us;
};

/*
 * This struct helps tracking the state needed for runtime PM, which puts the
 * device in PCI D3 state. Notice that when this happens, nothing on the
+105 −85
Original line number Diff line number Diff line
@@ -4376,134 +4376,146 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
					    downscale_amount);
}

static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
static int
skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
			    struct intel_crtc_state *cstate,
			    const struct intel_plane_state *intel_pstate,
				uint16_t ddb_allocation,
				int level,
				uint16_t *out_blocks, /* out */
				uint8_t *out_lines, /* out */
				bool *enabled /* out */)
			    struct skl_wm_params *wp)
{
	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
	const struct drm_plane_state *pstate = &intel_pstate->base;
	const struct drm_framebuffer *fb = pstate->fb;
	uint32_t latency = dev_priv->wm.skl_latency[level];
	uint_fixed_16_16_t method1, method2;
	uint_fixed_16_16_t plane_blocks_per_line;
	uint_fixed_16_16_t selected_result;
	uint32_t interm_pbpl;
	uint32_t plane_bytes_per_line;
	uint32_t res_blocks, res_lines;
	uint8_t cpp;
	uint32_t width = 0;
	uint32_t plane_pixel_rate;
	uint_fixed_16_16_t y_tile_minimum;
	uint32_t y_min_scanlines;
	struct intel_atomic_state *state =
		to_intel_atomic_state(cstate->base.state);
	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
	bool y_tiled, x_tiled;

	if (latency == 0 ||
	    !intel_wm_plane_visible(cstate, intel_pstate)) {
		*enabled = false;
	if (!intel_wm_plane_visible(cstate, intel_pstate))
		return 0;
	}

	y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
	wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
		      fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
		      fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
		      fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
	x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;

	/* Display WA #1141: kbl,cfl */
	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
	    dev_priv->ipc_enabled)
		latency += 4;

	if (apply_memory_bw_wa && x_tiled)
		latency += 15;
	wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
	wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
			 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;

	if (plane->id == PLANE_CURSOR) {
		width = intel_pstate->base.crtc_w;
		wp->width = intel_pstate->base.crtc_w;
	} else {
		/*
		 * Src coordinates are already rotated by 270 degrees for
		 * the 90/270 degree plane rotation cases (to match the
		 * GTT mapping), hence no need to account for rotation here.
		 */
		width = drm_rect_width(&intel_pstate->base.src) >> 16;
		wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
	}

	cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
	wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
							    fb->format->cpp[0];
	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
	wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
							     intel_pstate);

	if (drm_rotation_90_or_270(pstate->rotation)) {

		switch (cpp) {
		switch (wp->cpp) {
		case 1:
			y_min_scanlines = 16;
			wp->y_min_scanlines = 16;
			break;
		case 2:
			y_min_scanlines = 8;
			wp->y_min_scanlines = 8;
			break;
		case 4:
			y_min_scanlines = 4;
			wp->y_min_scanlines = 4;
			break;
		default:
			MISSING_CASE(cpp);
			MISSING_CASE(wp->cpp);
			return -EINVAL;
		}
	} else {
		y_min_scanlines = 4;
		wp->y_min_scanlines = 4;
	}

	if (apply_memory_bw_wa)
		y_min_scanlines *= 2;
		wp->y_min_scanlines *= 2;

	plane_bytes_per_line = width * cpp;
	if (y_tiled) {
		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
					   y_min_scanlines, 512);
	wp->plane_bytes_per_line = wp->width * wp->cpp;
	if (wp->y_tiled) {
		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
					   wp->y_min_scanlines, 512);

		if (INTEL_GEN(dev_priv) >= 10)
			interm_pbpl++;

		plane_blocks_per_line = div_fixed16(interm_pbpl,
							y_min_scanlines);
	} else if (x_tiled && INTEL_GEN(dev_priv) == 9) {
		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
		wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
							wp->y_min_scanlines);
	} else if (wp->x_tiled && IS_GEN9(dev_priv)) {
		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
	} else {
		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
	}

	method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency);
	method2 = skl_wm_method2(plane_pixel_rate,
	wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
					     wp->plane_blocks_per_line);
	wp->linetime_us = fixed16_to_u32_round_up(
					intel_get_linetime_us(cstate));

	return 0;
}

static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
				struct intel_crtc_state *cstate,
				const struct intel_plane_state *intel_pstate,
				uint16_t ddb_allocation,
				int level,
				const struct skl_wm_params *wp,
				uint16_t *out_blocks, /* out */
				uint8_t *out_lines, /* out */
				bool *enabled /* out */)
{
	const struct drm_plane_state *pstate = &intel_pstate->base;
	uint32_t latency = dev_priv->wm.skl_latency[level];
	uint_fixed_16_16_t method1, method2;
	uint_fixed_16_16_t selected_result;
	uint32_t res_blocks, res_lines;
	struct intel_atomic_state *state =
		to_intel_atomic_state(cstate->base.state);
	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);

	if (latency == 0 ||
	    !intel_wm_plane_visible(cstate, intel_pstate)) {
		*enabled = false;
		return 0;
	}

	/* Display WA #1141: kbl,cfl */
	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
	    dev_priv->ipc_enabled)
		latency += 4;

	if (apply_memory_bw_wa && wp->x_tiled)
		latency += 15;

	method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
				 wp->cpp, latency);
	method2 = skl_wm_method2(wp->plane_pixel_rate,
				 cstate->base.adjusted_mode.crtc_htotal,
				 latency,
				 plane_blocks_per_line);
				 wp->plane_blocks_per_line);

	y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
					 plane_blocks_per_line);

	if (y_tiled) {
		selected_result = max_fixed16(method2, y_tile_minimum);
	if (wp->y_tiled) {
		selected_result = max_fixed16(method2, wp->y_tile_minimum);
	} else {
		uint32_t linetime_us;

		linetime_us = fixed16_to_u32_round_up(
				intel_get_linetime_us(cstate));
		if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
		    (plane_bytes_per_line / 512 < 1))
		if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
		     512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
			selected_result = method2;
		else if (ddb_allocation >=
			 fixed16_to_u32_round_up(plane_blocks_per_line))
			 fixed16_to_u32_round_up(wp->plane_blocks_per_line))
			selected_result = min_fixed16(method1, method2);
		else if (latency >= linetime_us)
		else if (latency >= wp->linetime_us)
			selected_result = min_fixed16(method1, method2);
		else
			selected_result = method1;
@@ -4511,19 +4523,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,

	res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
	res_lines = div_round_up_fixed16(selected_result,
					 plane_blocks_per_line);
					 wp->plane_blocks_per_line);

	/* Display WA #1125: skl,bxt,kbl,glk */
	if (level == 0 &&
	    (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
	     fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
		res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
	if (level == 0 && wp->rc_surface)
		res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);

	/* Display WA #1126: skl,bxt,kbl,glk */
	if (level >= 1 && level <= 7) {
		if (y_tiled) {
			res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
			res_lines += y_min_scanlines;
		if (wp->y_tiled) {
			res_blocks += fixed16_to_u32_round_up(
							wp->y_tile_minimum);
			res_lines += wp->y_min_scanlines;
		} else {
			res_blocks++;
		}
@@ -4561,6 +4572,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
		      struct skl_ddb_allocation *ddb,
		      struct intel_crtc_state *cstate,
		      const struct intel_plane_state *intel_pstate,
		      const struct skl_wm_params *wm_params,
		      struct skl_plane_wm *wm)
{
	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
@@ -4584,6 +4596,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
					   intel_pstate,
					   ddb_blocks,
					   level,
					   wm_params,
					   &result->plane_res_b,
					   &result->plane_res_l,
					   &result->plane_en);
@@ -4648,11 +4661,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
		const struct intel_plane_state *intel_pstate =
						to_intel_plane_state(pstate);
		enum plane_id plane_id = to_intel_plane(plane)->id;
		struct skl_wm_params wm_params;

		wm = &pipe_wm->planes[plane_id];
		memset(&wm_params, 0, sizeof(struct skl_wm_params));

		ret = skl_compute_plane_wm_params(dev_priv, cstate,
						  intel_pstate, &wm_params);
		if (ret)
			return ret;

		ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
					    intel_pstate, wm);
					    intel_pstate, &wm_params, wm);
		if (ret)
			return ret;
		skl_compute_transition_wm(cstate, &wm->trans_wm);