Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7dfb9ba3 authored by Jani Nikula's avatar Jani Nikula
Browse files

Merge tag 'gvt-next-2017-06-08' of https://github.com/01org/gvt-linux into drm-intel-next-queued



gvt-next-2017-06-08

First gvt-next pull for 4.13:
- optimization for per-VM mmio save/restore (Changbin)
- optimization for mmio hash table (Changbin)
- scheduler optimization with event (Ping)
- vGPU reset refinement (Fred)
- other misc refactor and cleanups, etc.

Signed-off-by: default avatarJani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170608093547.bjgs436e3iokrzdm@zhen-hp.sh.intel.com
parents 9a30a261 615c16a9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3,6 +3,6 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
	interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
	execlist.o scheduler.o sched_policy.o render.o cmd_parser.o

ccflags-y				+= -I$(src) -I$(src)/$(GVT_DIR) -Wall
ccflags-y				+= -I$(src) -I$(src)/$(GVT_DIR)
i915-y					+= $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
obj-$(CONFIG_DRM_I915_GVT_KVMGT)	+= $(GVT_DIR)/kvmgt.o
+3 −53
Original line number Diff line number Diff line
@@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
	hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
}

#define GVT_MAX_CMD_LENGTH     20  /* In Dword */

static void trace_cs_command(struct parser_exec_state *s,
		cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
{
	/* This buffer is used by ftrace to store all commands copied from
	 * guest gma space. Sometimes commands can cross pages, this should
	 * not be handled in ftrace logic. So this is just used as a
	 * 'bounce buffer'
	 */
	u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
	int i;
	u32 cmd_len = cmd_length(s);
	/* The chosen value of GVT_MAX_CMD_LENGTH are just based on
	 * following two considerations:
	 * 1) From observation, most common ring commands is not that long.
	 *    But there are execeptions. So it indeed makes sence to observe
	 *    longer commands.
	 * 2) From the performance and debugging point of view, dumping all
	 *    contents of very commands is not necessary.
	 * We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
	 * future for performance considerations.
	 */
	if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
		gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
		cmd_len = GVT_MAX_CMD_LENGTH;
	}

	for (i = 0; i < cmd_len; i++)
		cmd_trace_buf[i] = cmd_val(s, i);

	trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
			cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
			cost_pre_cmd_handler, cost_cmd_handler);
}

/* call the cmd handler, and advance ip */
static int cmd_parser_exec(struct parser_exec_state *s)
{
	struct intel_vgpu *vgpu = s->vgpu;
	struct cmd_info *info;
	u32 cmd;
	int ret = 0;
	cycles_t t0, t1, t2;
	struct parser_exec_state s_before_advance_custom;
	struct intel_vgpu *vgpu = s->vgpu;

	t0 = get_cycles();

	cmd = cmd_val(s, 0);

@@ -2471,13 +2431,10 @@ static int cmd_parser_exec(struct parser_exec_state *s)
		return -EINVAL;
	}

	gvt_dbg_cmd("%s\n", info->name);

	s->info = info;

	t1 = get_cycles();

	s_before_advance_custom = *s;
	trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
			  cmd_length(s), s->buf_type);

	if (info->handler) {
		ret = info->handler(s);
@@ -2486,9 +2443,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
			return ret;
		}
	}
	t2 = get_cycles();

	trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);

	if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
		ret = cmd_advance_default(s);
@@ -2522,8 +2476,6 @@ static int command_scan(struct parser_exec_state *s,
	gma_tail = rb_start + rb_tail;
	gma_bottom = rb_start +  rb_len;

	gvt_dbg_cmd("scan_start: start=%lx end=%lx\n", gma_head, gma_tail);

	while (s->ip_gma != gma_tail) {
		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
			if (!(s->ip_gma >= rb_start) ||
@@ -2552,8 +2504,6 @@ static int command_scan(struct parser_exec_state *s,
		}
	}

	gvt_dbg_cmd("scan_end\n");

	return ret;
}

+23 −33
Original line number Diff line number Diff line
@@ -708,53 +708,43 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
{
	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
	struct execlist_ctx_descriptor_format *desc[2], valid_desc[2];
	unsigned long valid_desc_bitmap = 0;
	bool emulate_schedule_in = true;
	int ret;
	int i;
	struct execlist_ctx_descriptor_format desc[2];
	int i, ret;

	memset(valid_desc, 0, sizeof(valid_desc));
	desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
	desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);

	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
	if (!desc[0].valid) {
		gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
		goto inv_desc;
	}

	for (i = 0; i < 2; i++) {
		if (!desc[i]->valid)
	for (i = 0; i < ARRAY_SIZE(desc); i++) {
		if (!desc[i].valid)
			continue;

		if (!desc[i]->privilege_access) {
		if (!desc[i].privilege_access) {
			gvt_vgpu_err("unexpected GGTT elsp submission\n");
			return -EINVAL;
			goto inv_desc;
		}

		/* TODO: add another guest context checks here. */
		set_bit(i, &valid_desc_bitmap);
		valid_desc[i] = *desc[i];
	}

	if (!valid_desc_bitmap) {
		gvt_vgpu_err("no valid desc in a elsp submission\n");
		return -EINVAL;
	}

	if (!test_bit(0, (void *)&valid_desc_bitmap) &&
			test_bit(1, (void *)&valid_desc_bitmap)) {
		gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
		return -EINVAL;
	}

	/* submit workload */
	for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) {
		ret = submit_context(vgpu, ring_id, &valid_desc[i],
				emulate_schedule_in);
	for (i = 0; i < ARRAY_SIZE(desc); i++) {
		if (!desc[i].valid)
			continue;
		ret = submit_context(vgpu, ring_id, &desc[i], i == 0);
		if (ret) {
			gvt_vgpu_err("fail to schedule workload\n");
			gvt_vgpu_err("failed to submit desc %d\n", i);
			return ret;
		}
		emulate_schedule_in = false;
	}

	return 0;

inv_desc:
	gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
		     desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw);
	return -EINVAL;
}

static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
+2 −7
Original line number Diff line number Diff line
@@ -102,13 +102,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)

	p = firmware + h->mmio_offset;

	hash_for_each(gvt->mmio.mmio_info_table, i, e, node) {
		int j;

		for (j = 0; j < e->length; j += 4)
			*(u32 *)(p + e->offset + j) =
				I915_READ_NOTRACE(_MMIO(e->offset + j));
	}
	hash_for_each(gvt->mmio.mmio_info_table, i, e, node)
		*(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset));

	memcpy(gvt->firmware.mmio, p, info->mmio_size);

+9 −6
Original line number Diff line number Diff line
@@ -244,15 +244,19 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
	return readq(addr);
}

static void gtt_invalidate(struct drm_i915_private *dev_priv)
{
	mmio_hw_access_pre(dev_priv);
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
	mmio_hw_access_post(dev_priv);
}

static void write_pte64(struct drm_i915_private *dev_priv,
		unsigned long index, u64 pte)
{
	void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;

	writeq(pte, addr);

	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
}

static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt,
@@ -1849,6 +1853,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
	}

	ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
	gtt_invalidate(gvt->dev_priv);
	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
	return 0;
}
@@ -2301,8 +2306,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
	u32 num_entries;
	struct intel_gvt_gtt_entry e;

	intel_runtime_pm_get(dev_priv);

	memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
	e.type = GTT_TYPE_GGTT_PTE;
	ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
@@ -2318,7 +2321,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
	for (offset = 0; offset < num_entries; offset++)
		ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);

	intel_runtime_pm_put(dev_priv);
	gtt_invalidate(dev_priv);
}

/**
Loading