Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 51d61207 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel into drm-next

drm-intel-next-2016-08-22:
- bugfixes and cleanups for rcu-protected requests (Chris)
- atomic modeset fixes for gpu reset on pre-g4x (Maarten&Ville)
- guc submission improvements (Dave Gordon)
- panel power sequence cleanup (Imre)
- better use of stolen and unmappable ggtt (Chris), plus prep work to make that
  happen
- rework of framebuffer offsets, prep for multi-plane framebuffers (Ville)
- fully partial ggtt vmaps, including fenced ones (Chris)
- move lots more of the gem tracking from the object to the vma (Chris)
- tune the command parser (Chris)
- allow fbc without fences on recent platforms (Chris)
- fbc frontbuffer tracking fixes (Chris)
- fast prefaulting using io-mappping.h pgprot caching (Chris)

* 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel: (141 commits)
  io-mapping: Fixup for different names of writecombine
  io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/
  drm/i915: Update DRIVER_DATE to 20160822
  drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
  drm/i915: Embed the io-mapping struct inside drm_i915_private
  io-mapping: Always create a struct to hold metadata about the io-mapping
  drm/i915/fbc: Allow on unfenced surfaces, for recent gen
  drm/i915/fbc: Don't set an illegal fence if unfenced
  drm/i915: Flush delayed fence releases after reset
  drm/i915: Reattach comment, complete type specification
  drm/i915/cmdparser: Accelerate copies from WC memory
  drm/i915/cmdparser: Use binary search for faster register lookup
  drm/i915/cmdparser: Check for SKIP descriptors first
  drm/i915/cmdparser: Compare against the previous command descriptor
  drm/i915/cmdparser: Improve hash function
  drm/i915/cmdparser: Only cache the dst vmap
  drm/i915/cmdparser: Use cached vmappings
  drm/i915/cmdparser: Add the TIMESTAMP register for the other engines
  drm/i915/cmdparser: Make initialisation failure non-fatal
  drm/i915: Stop discarding GTT cache-domain on unbind vma
  ...
parents 78acdd4a 35124389
Loading
Loading
Loading
Loading
+2 −7
Original line number Diff line number Diff line
@@ -317,16 +317,11 @@ static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
					   size_t stolen_size)
{
	u16 toud;
	u16 toud = 0;

	/*
	 * FIXME is the graphics stolen memory region
	 * always at TOUD? Ie. is it always the last
	 * one to be allocated by the BIOS?
	 */
	toud = read_pci_config_16(0, 0, 0, I865_TOUD);

	return (phys_addr_t)toud << 16;
	return (phys_addr_t)(toud << 16) + i845_tseg_size();
}

static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
+2 −0
Original line number Diff line number Diff line
@@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr,
			   unsigned int flags)
{
	intel_private.driver->write_entry(addr, pg, flags);
	if (intel_private.driver->chipset_flush)
		intel_private.driver->chipset_flush();
}
EXPORT_SYMBOL(intel_gtt_insert_page);

+5 −1
Original line number Diff line number Diff line
@@ -3,12 +3,16 @@
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.

subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
subdir-ccflags-y += \
	$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)

# Please keep these build lists sorted!

# core driver code
i915-y := i915_drv.o \
	  i915_irq.o \
	  i915_memcpy.o \
	  i915_mm.o \
	  i915_params.o \
	  i915_pci.o \
          i915_suspend.o \
+161 −148
Original line number Diff line number Diff line
@@ -86,24 +86,25 @@
 * general bitmasking mechanism.
 */

#define STD_MI_OPCODE_MASK  0xFF800000
#define STD_3D_OPCODE_MASK  0xFFFF0000
#define STD_2D_OPCODE_MASK  0xFFC00000
#define STD_MFX_OPCODE_MASK 0xFFFF0000
#define STD_MI_OPCODE_SHIFT  (32 - 9)
#define STD_3D_OPCODE_SHIFT  (32 - 16)
#define STD_2D_OPCODE_SHIFT  (32 - 10)
#define STD_MFX_OPCODE_SHIFT (32 - 16)
#define MIN_OPCODE_SHIFT 16

#define CMD(op, opm, f, lm, fl, ...)				\
	{							\
		.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),	\
		.cmd = { (op), (opm) },				\
		.cmd = { (op), ~0u << (opm) },			\
		.length = { (lm) },				\
		__VA_ARGS__					\
	}

/* Convenience macros to compress the tables */
#define SMI STD_MI_OPCODE_MASK
#define S3D STD_3D_OPCODE_MASK
#define S2D STD_2D_OPCODE_MASK
#define SMFX STD_MFX_OPCODE_MASK
#define SMI STD_MI_OPCODE_SHIFT
#define S3D STD_3D_OPCODE_SHIFT
#define S2D STD_2D_OPCODE_SHIFT
#define SMFX STD_MFX_OPCODE_SHIFT
#define F true
#define S CMD_DESC_SKIP
#define R CMD_DESC_REJECT
@@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
};

static const struct drm_i915_cmd_descriptor noop_desc =
	CMD(MI_NOOP, SMI, F, 1, S);

#undef CMD
#undef SMI
#undef S3D
@@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
	REG32(GEN7_GPGPU_DISPATCHDIMX),
	REG32(GEN7_GPGPU_DISPATCHDIMY),
	REG32(GEN7_GPGPU_DISPATCHDIMZ),
	REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
@@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
	REG32(GEN7_L3SQCREG1),
	REG32(GEN7_L3CNTLREG2),
	REG32(GEN7_L3CNTLREG3),
	REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};

static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
@@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
};

static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
	REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
	REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
	REG32(BCS_SWCTRL),
	REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};

static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
@@ -691,12 +700,26 @@ struct cmd_node {
 * non-opcode bits being set. But if we don't include those bits, some 3D
 * commands may hash to the same bucket due to not including opcode bits that
 * make the command unique. For now, we will risk hashing to the same bucket.
 *
 * If we attempt to generate a perfect hash, we should be able to look at bits
 * 31:29 of a command from a batch buffer and use the full mask for that
 * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
 */
#define CMD_HASH_MASK STD_MI_OPCODE_MASK
static inline u32 cmd_header_key(u32 x)
{
	u32 shift;

	switch (x >> INSTR_CLIENT_SHIFT) {
	default:
	case INSTR_MI_CLIENT:
		shift = STD_MI_OPCODE_SHIFT;
		break;
	case INSTR_RC_CLIENT:
		shift = STD_3D_OPCODE_SHIFT;
		break;
	case INSTR_BC_CLIENT:
		shift = STD_2D_OPCODE_SHIFT;
		break;
	}

	return x >> shift;
}

static int init_hash_table(struct intel_engine_cs *engine,
			   const struct drm_i915_cmd_table *cmd_tables,
@@ -720,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine,

			desc_node->desc = desc;
			hash_add(engine->cmd_hash, &desc_node->node,
				 desc->cmd.value & CMD_HASH_MASK);
				 cmd_header_key(desc->cmd.value));
		}
	}

@@ -746,17 +769,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
 * Optionally initializes fields related to batch buffer command parsing in the
 * struct intel_engine_cs based on whether the platform requires software
 * command parsing.
 *
 * Return: non-zero if initialization fails
 */
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
	const struct drm_i915_cmd_table *cmd_tables;
	int cmd_table_count;
	int ret;

	if (!IS_GEN7(engine->i915))
		return 0;
		return;

	switch (engine->id) {
	case RCS:
@@ -811,24 +832,27 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
		break;
	default:
		MISSING_CASE(engine->id);
		BUG();
		return;
	}

	BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count));
	BUG_ON(!validate_regs_sorted(engine));

	WARN_ON(!hash_empty(engine->cmd_hash));
	if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
		DRM_ERROR("%s: command descriptions are not sorted\n",
			  engine->name);
		return;
	}
	if (!validate_regs_sorted(engine)) {
		DRM_ERROR("%s: registers are not sorted\n", engine->name);
		return;
	}

	ret = init_hash_table(engine, cmd_tables, cmd_table_count);
	if (ret) {
		DRM_ERROR("CMD: cmd_parser_init failed!\n");
		DRM_ERROR("%s: initialised failed!\n", engine->name);
		fini_hash_table(engine);
		return ret;
		return;
	}

	engine->needs_cmd_parser = true;

	return 0;
}

/**
@@ -853,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
	struct cmd_node *desc_node;

	hash_for_each_possible(engine->cmd_hash, desc_node, node,
			       cmd_header & CMD_HASH_MASK) {
			       cmd_header_key(cmd_header)) {
		const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
		u32 masked_cmd = desc->cmd.mask & cmd_header;
		u32 masked_value = desc->cmd.value & desc->cmd.mask;

		if (masked_cmd == masked_value)
		if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
			return desc;
	}

@@ -876,11 +897,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
	 u32 cmd_header,
	 const struct drm_i915_cmd_descriptor *desc,
	 struct drm_i915_cmd_descriptor *default_desc)
{
	const struct drm_i915_cmd_descriptor *desc;
	u32 mask;

	if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
		return desc;

	desc = find_cmd_in_table(engine, cmd_header);
	if (desc)
		return desc;
@@ -889,140 +913,127 @@ find_cmd(struct intel_engine_cs *engine,
	if (!mask)
		return NULL;

	BUG_ON(!default_desc);
	default_desc->flags = CMD_DESC_SKIP;
	default_desc->cmd.value = cmd_header;
	default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT;
	default_desc->length.mask = mask;

	default_desc->flags = CMD_DESC_SKIP;
	return default_desc;
}

static const struct drm_i915_reg_descriptor *
find_reg(const struct drm_i915_reg_descriptor *table,
	 int count, u32 addr)
__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
{
	int i;

	for (i = 0; i < count; i++) {
		if (i915_mmio_reg_offset(table[i].addr) == addr)
			return &table[i];
	int start = 0, end = count;
	while (start < end) {
		int mid = start + (end - start) / 2;
		int ret = addr - i915_mmio_reg_offset(table[mid].addr);
		if (ret < 0)
			end = mid;
		else if (ret > 0)
			start = mid + 1;
		else
			return &table[mid];
	}

	return NULL;
}

static const struct drm_i915_reg_descriptor *
find_reg_in_tables(const struct drm_i915_reg_table *tables,
		   int count, bool is_master, u32 addr)
find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
{
	int i;
	const struct drm_i915_reg_table *table;
	const struct drm_i915_reg_descriptor *reg;
	const struct drm_i915_reg_table *table = engine->reg_tables;
	int count = engine->reg_table_count;

	for (i = 0; i < count; i++) {
		table = &tables[i];
	do {
		if (!table->master || is_master) {
			reg = find_reg(table->regs, table->num_regs,
				       addr);
			const struct drm_i915_reg_descriptor *reg;

			reg = __find_reg(table->regs, table->num_regs, addr);
			if (reg != NULL)
				return reg;
		}
	}
	} while (table++, --count);

	return NULL;
}

static u32 *vmap_batch(struct drm_i915_gem_object *obj,
		       unsigned start, unsigned len)
{
	int i;
	void *addr = NULL;
	struct sg_page_iter sg_iter;
	int first_page = start >> PAGE_SHIFT;
	int last_page = (len + start + 4095) >> PAGE_SHIFT;
	int npages = last_page - first_page;
	struct page **pages;

	pages = drm_malloc_ab(npages, sizeof(*pages));
	if (pages == NULL) {
		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
		goto finish;
	}

	i = 0;
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
		pages[i++] = sg_page_iter_page(&sg_iter);
		if (i == npages)
			break;
	}

	addr = vmap(pages, i, 0, PAGE_KERNEL);
	if (addr == NULL) {
		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
		goto finish;
	}

finish:
	if (pages)
		drm_free_large(pages);
	return (u32*)addr;
}

/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
		       struct drm_i915_gem_object *src_obj,
		       u32 batch_start_offset,
		       u32 batch_len)
		       u32 batch_len,
		       bool *needs_clflush_after)
{
	int needs_clflush = 0;
	void *src_base, *src;
	void *dst = NULL;
	unsigned int src_needs_clflush;
	unsigned int dst_needs_clflush;
	void *dst, *src;
	int ret;

	if (batch_len > dest_obj->base.size ||
	    batch_len + batch_start_offset > src_obj->base.size)
		return ERR_PTR(-E2BIG);

	if (WARN_ON(dest_obj->pages_pin_count == 0))
		return ERR_PTR(-ENODEV);

	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
	if (ret) {
		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
	if (ret)
		return ERR_PTR(ret);
	}

	src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
	if (!src_base) {
		DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
		ret = -ENOMEM;
	ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
	if (ret) {
		dst = ERR_PTR(ret);
		goto unpin_src;
	}

	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
	if (ret) {
		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
		goto unmap_src;
	}
	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
	if (IS_ERR(dst))
		goto unpin_dst;

	dst = vmap_batch(dest_obj, 0, batch_len);
	if (!dst) {
		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
		ret = -ENOMEM;
		goto unmap_src;
	src = ERR_PTR(-ENODEV);
	if (src_needs_clflush &&
	    i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
		if (!IS_ERR(src)) {
			i915_memcpy_from_wc(dst,
					    src + batch_start_offset,
					    ALIGN(batch_len, 16));
			i915_gem_object_unpin_map(src_obj);
		}
	}
	if (IS_ERR(src)) {
		void *ptr;
		int offset, n;

	src = src_base + offset_in_page(batch_start_offset);
	if (needs_clflush)
		drm_clflush_virt_range(src, batch_len);
		offset = offset_in_page(batch_start_offset);

	memcpy(dst, src, batch_len);
		/* We can avoid clflushing partial cachelines before the write
		 * if we only every write full cache-lines. Since we know that
		 * both the source and destination are in multiples of
		 * PAGE_SIZE, we can simply round up to the next cacheline.
		 * We don't care about copying too much here as we only
		 * validate up to the end of the batch.
		 */
		if (dst_needs_clflush & CLFLUSH_BEFORE)
			batch_len = roundup(batch_len,
					    boot_cpu_data.x86_clflush_size);

unmap_src:
	vunmap(src_base);
unpin_src:
	i915_gem_object_unpin_pages(src_obj);
		ptr = dst;
		for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
			int len = min_t(int, batch_len, PAGE_SIZE - offset);

			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
			if (src_needs_clflush)
				drm_clflush_virt_range(src + offset, len);
			memcpy(ptr, src + offset, len);
			kunmap_atomic(src);

			ptr += len;
			batch_len -= len;
			offset = 0;
		}
	}

	/* dst_obj is returned with vmap pinned */
	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;

	return ret ? ERR_PTR(ret) : dst;
unpin_dst:
	i915_gem_obj_finish_shmem_access(dst_obj);
unpin_src:
	i915_gem_obj_finish_shmem_access(src_obj);
	return dst;
}

/**
@@ -1052,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
		      const bool is_master,
		      bool *oacontrol_set)
{
	if (desc->flags & CMD_DESC_SKIP)
		return true;

	if (desc->flags & CMD_DESC_REJECT) {
		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
		return false;
@@ -1076,10 +1090,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
		     offset += step) {
			const u32 reg_addr = cmd[offset] & desc->reg.mask;
			const struct drm_i915_reg_descriptor *reg =
				find_reg_in_tables(engine->reg_tables,
						   engine->reg_table_count,
						   is_master,
						   reg_addr);
				find_reg(engine, is_master, reg_addr);

			if (!reg) {
				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@@ -1200,16 +1211,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
			    u32 batch_len,
			    bool is_master)
{
	u32 *cmd, *batch_base, *batch_end;
	struct drm_i915_cmd_descriptor default_desc = { 0 };
	u32 *cmd, *batch_end;
	struct drm_i915_cmd_descriptor default_desc = noop_desc;
	const struct drm_i915_cmd_descriptor *desc = &default_desc;
	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
	bool needs_clflush_after = false;
	int ret = 0;

	batch_base = copy_batch(shadow_batch_obj, batch_obj,
				batch_start_offset, batch_len);
	if (IS_ERR(batch_base)) {
	cmd = copy_batch(shadow_batch_obj, batch_obj,
			 batch_start_offset, batch_len,
			 &needs_clflush_after);
	if (IS_ERR(cmd)) {
		DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
		return PTR_ERR(batch_base);
		return PTR_ERR(cmd);
	}

	/*
@@ -1217,17 +1231,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
	 * large or larger and copy_batch() will write MI_NOPs to the extra
	 * space. Parsing should be faster in some cases this way.
	 */
	batch_end = batch_base + (batch_len / sizeof(*batch_end));

	cmd = batch_base;
	batch_end = cmd + (batch_len / sizeof(*batch_end));
	while (cmd < batch_end) {
		const struct drm_i915_cmd_descriptor *desc;
		u32 length;

		if (*cmd == MI_BATCH_BUFFER_END)
			break;

		desc = find_cmd(engine, *cmd, &default_desc);
		desc = find_cmd(engine, *cmd, desc, &default_desc);
		if (!desc) {
			DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
					 *cmd);
@@ -1278,7 +1289,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
		ret = -EINVAL;
	}

	vunmap(batch_base);
	if (ret == 0 && needs_clflush_after)
		drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
	i915_gem_object_unpin_map(shadow_batch_obj);

	return ret;
}
+142 −192

File changed.

Preview size limit exceeded, changes collapsed.

Loading