Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fbe5d36e authored by Ben Widawsky's avatar Ben Widawsky Committed by Daniel Vetter
Browse files

drm/i915/bdw: Support BDW caching



BDW caching works differently than the previous generations. Instead of
having bits in the PTE which directly control how the page is cached,
the 3 PTE bits PWT PCD and PAT provide an index into a PAT defined by
register 0x40e0. This style of caching is functionally equivalent to how
it works on HSW and before.

v2: Tiny bikeshed as discussed on internal irc.

v3: Squash in patch from Ville to mirror the x86 PAT setup more like
in arch/x86/mm/pat.c. Primarily, the 0th index will be WB, and not
uncached.

v4: Comment for reason to not use a 64b write on the PPAT.

v5: Add a FIXME comment that the caching bits in the PAT registers
might be wrong due to doc confusion.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: default avatarImre Deak <imre.deak@intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 94ec8f61
Loading
Loading
Loading
Loading
+45 −0
Original line number Original line Diff line number Diff line
@@ -58,12 +58,21 @@ typedef uint64_t gen8_gtt_pte_t;
#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)


#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */

static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
					     enum i915_cache_level level,
					     enum i915_cache_level level,
					     bool valid)
					     bool valid)
{
{
	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
	pte |= addr;
	pte |= addr;
	if (level != I915_CACHE_NONE)
		pte |= PPAT_CACHED_INDEX;
	else
		pte |= PPAT_UNCACHED_INDEX;
	return pte;
	return pte;
}
}


@@ -806,6 +815,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
			*end -= 4096;
			*end -= 4096;
	}
	}
}
}

void i915_gem_setup_global_gtt(struct drm_device *dev,
void i915_gem_setup_global_gtt(struct drm_device *dev,
			       unsigned long start,
			       unsigned long start,
			       unsigned long mappable_end,
			       unsigned long mappable_end,
@@ -1003,6 +1013,39 @@ static int ggtt_probe_common(struct drm_device *dev,
	return ret;
	return ret;
}
}


/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 * bits. When using advanced contexts each context stores its own PAT, but
 * writing this data shouldn't be harmful even in those cases. */
static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
{
#define GEN8_PPAT_UC		(0<<0)
#define GEN8_PPAT_WC		(1<<0)
#define GEN8_PPAT_WT		(2<<0)
#define GEN8_PPAT_WB		(3<<0)
#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
/* FIXME(BDW): Bspec is completely confused about cache control bits. */
#define GEN8_PPAT_LLC		(1<<2)
#define GEN8_PPAT_LLCELLC	(2<<2)
#define GEN8_PPAT_LLCeLLC	(3<<2)
#define GEN8_PPAT_AGE(x)	(x<<4)
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
	uint64_t pat;

	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));

	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
	 * write would work. */
	I915_WRITE(GEN8_PRIVATE_PAT, pat);
	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
}

static int gen8_gmch_probe(struct drm_device *dev,
static int gen8_gmch_probe(struct drm_device *dev,
			   size_t *gtt_total,
			   size_t *gtt_total,
			   size_t *stolen,
			   size_t *stolen,
@@ -1028,6 +1071,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;


	gen8_setup_private_ppat(dev_priv);

	ret = ggtt_probe_common(dev, gtt_size);
	ret = ggtt_probe_common(dev, gtt_size);


	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
+1 −0
Original line number Original line Diff line number Diff line
@@ -665,6 +665,7 @@
#define   RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
#define   RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
#define   RING_FAULT_VALID	(1<<0)
#define   RING_FAULT_VALID	(1<<0)
#define DONE_REG		0x40b0
#define DONE_REG		0x40b0
#define GEN8_PRIVATE_PAT	0x40e0
#define BSD_HWS_PGA_GEN7	(0x04180)
#define BSD_HWS_PGA_GEN7	(0x04180)
#define BLT_HWS_PGA_GEN7	(0x04280)
#define BLT_HWS_PGA_GEN7	(0x04280)
#define VEBOX_HWS_PGA_GEN7	(0x04380)
#define VEBOX_HWS_PGA_GEN7	(0x04380)