Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c5c7bc71 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-next-2017-09-29' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

2nd batch of v4.15 features:

- lib/scatterlist updates, use for userptr allocations (Tvrtko)
- Fixed point wrapper cleanup (Mahesh)
- Gen9+ transition watermarks, watermark optimization and fixes (Mahesh)
- Display IPC (Isochronous Priority Control) support (Mahesh)
- GEM workaround fixes (Oscar)
- GVT: PCI config sanitize series (Changbin)
- GVT: Workload submission error handling series (Fred)
- PSR fixes and refactoring (Rodrigo)
- HWSP based optimizations (Chris)
- Private PAT management (Zhi)
- IRQ handling fixes and refactoring (Ville)
- Module parameter refactoring and variable name clash fix (Michal)
- Execlist refactoring, incomplete request unwinding on reset (Chris)
- GuC scheduling improvements (Michal)
- OA updates (Lionel)
- Coffeelake out of alpha support (Rodrigo)
- seqno fixes (Chris)
- Execlist refactoring (Mika)
- DP and DP MST cleanups (Dhinakaran)
- Cannonlake slice/sublice config (Ben)
- Numerous fixes all around (Everyone)

* tag 'drm-intel-next-2017-09-29' of git://anongit.freedesktop.org/drm/drm-intel: (168 commits)
  drm/i915: Update DRIVER_DATE to 20170929
  drm/i915: Use memset64() to prefill the GTT page
  drm/i915: Also discard second CRC on gen8+ platforms.
  drm/i915/psr: Set frames before SU entry for psr2
  drm/dp: Add defines for latency in sink
  drm/i915: Allow optimized platform checks
  drm/i915: Avoid using dev_priv->info.gen directly.
  i915: Use %pS printk format for direct addresses
  drm/i915/execlists: Notify context-out for lost requests
  drm/i915/cnl: Add support slice/subslice/eu configs
  drm/i915: Compact device info access by a small re-ordering
  drm/i915: Add IS_PLATFORM macro
  drm/i915/selftests: Try to recover from a wedged GPU during reset tests
  drm/i915/huc: Reorganize HuC authentication
  drm/i915: Fix default values of some modparams
  drm/i915: Extend I915_PARAMS_FOR_EACH with default member value
  drm/i915: Make I915_PARAMS_FOR_EACH macro more flexible
  drm/i915: Enable scanline read based on frame timestamps
  drm/i915/execlists: Microoptimise execlists_cancel_port_request()
  drm/i915: Don't rmw PIPESTAT enable bits
  ...
parents 418da172 e18063e8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ config DRM_I915
	select DRM_PANEL
	select DRM_MIPI_DSI
	select RELAY
	select IRQ_WORK
	# i915 depends on ACPI_VIDEO when ACPI is enabled
	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
	select BACKLIGHT_LCD_SUPPORT if ACPI
+2 −1
Original line number Diff line number Diff line
@@ -139,7 +139,8 @@ i915-y += i915_perf.o \
	  i915_oa_bxt.o \
	  i915_oa_kblgt2.o \
	  i915_oa_kblgt3.o \
	  i915_oa_glk.o
	  i915_oa_glk.o \
	  i915_oa_cflgt2.o

ifeq ($(CONFIG_DRM_I915_GVT),y)
i915-y += intel_gvt.o
+22 −8
Original line number Diff line number Diff line
@@ -101,7 +101,7 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
	if (WARN_ON(bytes > 4))
		return -EINVAL;

	if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ))
	if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size))
		return -EINVAL;

	memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes);
@@ -110,13 +110,25 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,

static int map_aperture(struct intel_vgpu *vgpu, bool map)
{
	u64 first_gfn, first_mfn;
	phys_addr_t aperture_pa = vgpu_aperture_pa_base(vgpu);
	unsigned long aperture_sz = vgpu_aperture_sz(vgpu);
	u64 first_gfn;
	u64 val;
	int ret;

	if (map == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked)
		return 0;

	if (map) {
		vgpu->gm.aperture_va = memremap(aperture_pa, aperture_sz,
						MEMREMAP_WC);
		if (!vgpu->gm.aperture_va)
			return -ENOMEM;
	} else {
		memunmap(vgpu->gm.aperture_va);
		vgpu->gm.aperture_va = NULL;
	}

	val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_2];
	if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
		val = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);
@@ -124,14 +136,16 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map)
		val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2);

	first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT;
	first_mfn = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;

	ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn,
						  first_mfn,
						  vgpu_aperture_sz(vgpu) >>
						  PAGE_SHIFT, map);
	if (ret)
						  aperture_pa >> PAGE_SHIFT,
						  aperture_sz >> PAGE_SHIFT,
						  map);
	if (ret) {
		memunmap(vgpu->gm.aperture_va);
		vgpu->gm.aperture_va = NULL;
		return ret;
	}

	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map;
	return 0;
@@ -275,7 +289,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
	if (WARN_ON(bytes > 4))
		return -EINVAL;

	if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ))
	if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size))
		return -EINVAL;

	/* First check if it's PCI_COMMAND */
+24 −13
Original line number Diff line number Diff line
@@ -1576,11 +1576,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
	return 1;
}

static uint32_t find_bb_size(struct parser_exec_state *s)
static int find_bb_size(struct parser_exec_state *s)
{
	unsigned long gma = 0;
	struct cmd_info *info;
	uint32_t bb_size = 0;
	int bb_size = 0;
	uint32_t cmd_len = 0;
	bool met_bb_end = false;
	struct intel_vgpu *vgpu = s->vgpu;
@@ -1637,6 +1637,8 @@ static int perform_bb_shadow(struct parser_exec_state *s)

	/* get the size of the batch buffer */
	bb_size = find_bb_size(s);
	if (bb_size < 0)
		return -EINVAL;

	/* allocate shadow batch buffer */
	entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL);
@@ -2603,7 +2605,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{
	struct intel_vgpu *vgpu = workload->vgpu;
	unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
	u32 *cs;
	void *shadow_ring_buffer_va;
	int ring_id = workload->ring_id;
	int ret;

	guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2616,34 +2619,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
	gma_tail = workload->rb_start + workload->rb_tail;
	gma_top = workload->rb_start + guest_rb_size;

	/* allocate shadow ring buffer */
	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
	if (IS_ERR(cs))
		return PTR_ERR(cs);
	if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
		void *va = vgpu->reserve_ring_buffer_va[ring_id];
		/* realloc the new ring buffer if needed */
		vgpu->reserve_ring_buffer_va[ring_id] =
			krealloc(va, workload->rb_len, GFP_KERNEL);
		if (!vgpu->reserve_ring_buffer_va[ring_id]) {
			gvt_vgpu_err("fail to alloc reserve ring buffer\n");
			return -ENOMEM;
		}
		vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
	}

	shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];

	/* get shadow ring buffer va */
	workload->shadow_ring_buffer_va = cs;
	workload->shadow_ring_buffer_va = shadow_ring_buffer_va;

	/* head > tail --> copy head <-> top */
	if (gma_head > gma_tail) {
		ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
				      gma_head, gma_top, cs);
				      gma_head, gma_top, shadow_ring_buffer_va);
		if (ret < 0) {
			gvt_vgpu_err("fail to copy guest ring buffer\n");
			return ret;
		}
		cs += ret / sizeof(u32);
		shadow_ring_buffer_va += ret;
		gma_head = workload->rb_start;
	}

	/* copy head or start <-> tail */
	ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
	ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
				shadow_ring_buffer_va);
	if (ret < 0) {
		gvt_vgpu_err("fail to copy guest ring buffer\n");
		return ret;
	}
	cs += ret / sizeof(u32);
	intel_ring_advance(workload->req, cs);
	return 0;
}

+95 −32
Original line number Diff line number Diff line
@@ -368,7 +368,7 @@ static void free_workload(struct intel_vgpu_workload *workload)
#define get_desc_from_elsp_dwords(ed, i) \
	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))

static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
{
	const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
	struct intel_shadow_bb_entry *entry_obj;
@@ -379,7 +379,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)

		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
		if (IS_ERR(vma)) {
			return;
			return PTR_ERR(vma);
		}

		/* FIXME: we are not tracking our pinned VMA leaving it
@@ -392,6 +392,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
		if (gmadr_bytes == 8)
			entry_obj->bb_start_cmd_va[2] = 0;
	}
	return 0;
}

static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
@@ -420,7 +421,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
	return 0;
}

static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
	struct i915_vma *vma;
	unsigned char *per_ctx_va =
@@ -428,12 +429,12 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
		wa_ctx->indirect_ctx.size;

	if (wa_ctx->indirect_ctx.size == 0)
		return;
		return 0;

	vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
				       0, CACHELINE_BYTES, 0);
	if (IS_ERR(vma)) {
		return;
		return PTR_ERR(vma);
	}

	/* FIXME: we are not tracking our pinned VMA leaving it
@@ -447,26 +448,7 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
	memset(per_ctx_va, 0, CACHELINE_BYTES);

	update_wa_ctx_2_shadow_ctx(wa_ctx);
}

static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
{
	struct intel_vgpu *vgpu = workload->vgpu;
	struct execlist_ctx_descriptor_format ctx[2];
	int ring_id = workload->ring_id;

	intel_vgpu_pin_mm(workload->shadow_mm);
	intel_vgpu_sync_oos_pages(workload->vgpu);
	intel_vgpu_flush_post_shadow(workload->vgpu);
	prepare_shadow_batch_buffer(workload);
	prepare_shadow_wa_ctx(&workload->wa_ctx);
	if (!workload->emulate_schedule_in)
	return 0;

	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);

	return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
}

static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
@@ -489,13 +471,62 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
	}
}

static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
{
	if (!wa_ctx->indirect_ctx.obj)
		return;
	struct intel_vgpu *vgpu = workload->vgpu;
	struct execlist_ctx_descriptor_format ctx[2];
	int ring_id = workload->ring_id;
	int ret;

	ret = intel_vgpu_pin_mm(workload->shadow_mm);
	if (ret) {
		gvt_vgpu_err("fail to vgpu pin mm\n");
		goto out;
	}

	ret = intel_vgpu_sync_oos_pages(workload->vgpu);
	if (ret) {
		gvt_vgpu_err("fail to vgpu sync oos pages\n");
		goto err_unpin_mm;
	}

	ret = intel_vgpu_flush_post_shadow(workload->vgpu);
	if (ret) {
		gvt_vgpu_err("fail to flush post shadow\n");
		goto err_unpin_mm;
	}

	ret = prepare_shadow_batch_buffer(workload);
	if (ret) {
		gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
		goto err_unpin_mm;
	}

	ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
	if (ret) {
		gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
		goto err_shadow_batch;
	}

	if (!workload->emulate_schedule_in)
		return 0;

	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);

	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
	ret = emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
	if (!ret)
		goto out;
	else
		gvt_vgpu_err("fail to emulate execlist schedule in\n");

	release_shadow_wa_ctx(&workload->wa_ctx);
err_shadow_batch:
	release_shadow_batch_buffer(workload);
err_unpin_mm:
	intel_vgpu_unpin_mm(workload->shadow_mm);
out:
	return ret;
}

static int complete_execlist_workload(struct intel_vgpu_workload *workload)
@@ -511,8 +542,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
	gvt_dbg_el("complete workload %p status %d\n", workload,
			workload->status);

	if (!workload->status) {
		release_shadow_batch_buffer(workload);
		release_shadow_wa_ctx(&workload->wa_ctx);
	}

	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
		/* if workload->status is not successful means HW GPU
@@ -820,10 +853,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)

void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
{
	enum intel_engine_id i;
	struct intel_engine_cs *engine;

	clean_workloads(vgpu, ALL_ENGINES);
	kmem_cache_destroy(vgpu->workloads);

	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
		kfree(vgpu->reserve_ring_buffer_va[i]);
		vgpu->reserve_ring_buffer_va[i] = NULL;
		vgpu->reserve_ring_buffer_size[i] = 0;
	}

}

#define RESERVE_RING_BUFFER_SIZE		((1 * PAGE_SIZE)/8)
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
{
	enum intel_engine_id i;
@@ -843,7 +887,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
	if (!vgpu->workloads)
		return -ENOMEM;

	/* each ring has a shadow ring buffer until vgpu destroyed */
	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
		vgpu->reserve_ring_buffer_va[i] =
			kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
		if (!vgpu->reserve_ring_buffer_va[i]) {
			gvt_vgpu_err("fail to alloc reserve ring buffer\n");
			goto out;
		}
		vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
	}
	return 0;
out:
	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
		if (vgpu->reserve_ring_buffer_size[i]) {
			kfree(vgpu->reserve_ring_buffer_va[i]);
			vgpu->reserve_ring_buffer_va[i] = NULL;
			vgpu->reserve_ring_buffer_size[i] = 0;
		}
	}
	return -ENOMEM;
}

void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
Loading