Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a2184922 authored by Jordan Crouse's avatar Jordan Crouse
Browse files

msm: kgsl: Refactor GPU addressing



32 bit addresses can take us only so far. The current state of the driver
works great for 32 bit address but not so much for 64 bit addresses. Put
a generic memory mapping code into place that will allow for 32 bit or 64
bit addressing schemes.

Change-Id: Ic0dedbadb8d0e44a0914f99c223b398fb9fde153
Signed-off-by: default avatarJordan Crouse <jcrouse@codeaurora.org>
parent a820922f
Loading
Loading
Loading
Loading
+66 −59
Original line number Diff line number Diff line
@@ -117,9 +117,8 @@ static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr,
}

/*
 * adreno_ib_add_range() - Add a gpuaddress range to list
 * adreno_ib_add() - Add a gpuaddress range to list
 * @process: Process in which the gpuaddress is mapped
 * @size: Size of the address range in concern
 * @type: The type of address range
 * @ib_obj_list: List of the address ranges in which the given range is to be
 * added
@@ -130,18 +129,18 @@ static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr,
 *
 * Returns 0 on success else error code
 */
static int adreno_ib_add_range(struct kgsl_process_private *process,
				uint64_t gpuaddr,
				uint64_t size, int type,
static int adreno_ib_add(struct kgsl_process_private *process,
				uint64_t gpuaddr, int type,
				struct adreno_ib_object_list *ib_obj_list)
{
	uint64_t size;
	struct adreno_ib_object *ib_obj;
	struct kgsl_mem_entry *entry;

	if (MAX_IB_OBJS <= ib_obj_list->num_objs)
		return -E2BIG;

	entry = kgsl_sharedmem_find_region(process, gpuaddr, size);
	entry = kgsl_sharedmem_find(process, gpuaddr);
	if (!entry)
		/*
		 * Do not fail if gpuaddr not found, we can continue
@@ -150,10 +149,8 @@ static int adreno_ib_add_range(struct kgsl_process_private *process,
		 */
		return 0;

	if (!size) {
	size = entry->memdesc.size;
	gpuaddr = entry->memdesc.gpuaddr;
	}

	ib_obj = adreno_ib_check_overlap(gpuaddr, size, type, ib_obj_list);
	if (ib_obj) {
@@ -196,23 +193,30 @@ static int ib_save_mip_addresses(unsigned int *pkt,
		unitsize = load_state_unit_sizes[block][1];

	if (3 == block && 1 == type) {
		ent = kgsl_sharedmem_find_region(process, pkt[2] & 0xFFFFFFFC,
					(num_levels * unitsize) << 2);
		if (!ent)
		uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC;
		uint64_t size = (num_levels * unitsize) << 2;

		ent = kgsl_sharedmem_find(process, gpuaddr);
		if (ent == NULL)
			return 0;

		hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc,
				pkt[2] & 0xFFFFFFFC);
		if (!hostptr) {
		if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc,
			gpuaddr, size)) {
			kgsl_mem_entry_put(ent);
			return 0;
		}

		hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr);
		if (hostptr != NULL) {
			for (i = 0; i < num_levels; i++) {
			ret = adreno_ib_add_range(process, hostptr[i],
				0, SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
				ret = adreno_ib_add(process, hostptr[i],
					SNAPSHOT_GPU_OBJECT_GENERIC,
					ib_obj_list);
				if (ret)
					break;
			}
		}

		kgsl_memdesc_unmap(&ent->memdesc);
		kgsl_mem_entry_put(ent);
	}
@@ -256,7 +260,7 @@ static int ib_parse_load_state(unsigned int *pkt,
	 * like memory
	 */
	for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) {
		ret |= adreno_ib_add_range(process, pkt[2 + i] & 0xFFFFFFFC, 0,
		ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC,
				SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		if (ret)
@@ -284,13 +288,13 @@ static int ib_parse_set_bin_data(unsigned int *pkt,
		return 0;

	/* Visiblity stream buffer */
	ret = adreno_ib_add_range(process, pkt[1], 0,
	ret = adreno_ib_add(process, pkt[1],
		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
	if (ret)
		return ret;

	/* visiblity stream size buffer (fixed size 8 dwords) */
	ret = adreno_ib_add_range(process, pkt[2], 0,
	ret = adreno_ib_add(process, pkt[2],
		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);

	return ret;
@@ -316,7 +320,7 @@ static int ib_parse_mem_write(unsigned int *pkt,
	 * to get the whole thing. Pass a size of 0 tocapture the entire buffer.
	 */

	return adreno_ib_add_range(process, pkt[1] & 0xFFFFFFFC, 0,
	return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC,
		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
}

@@ -348,9 +352,9 @@ static int ib_add_type0_entries(struct kgsl_device *device,
	for (i = ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0;
		i < ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7; i++) {
		if (ib_parse_vars->cp_addr_regs[i]) {
			ret = adreno_ib_add_range(process,
			ret = adreno_ib_add(process,
				ib_parse_vars->cp_addr_regs[i] & mask,
				0, SNAPSHOT_GPU_OBJECT_GENERIC,
				SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
			if (ret)
				return ret;
@@ -366,9 +370,9 @@ static int ib_add_type0_entries(struct kgsl_device *device,
	for (i = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0;
		i <= vfd_end; i++) {
		if (ib_parse_vars->cp_addr_regs[i]) {
			ret = adreno_ib_add_range(process,
			ret = adreno_ib_add(process,
				ib_parse_vars->cp_addr_regs[i],
				0, SNAPSHOT_GPU_OBJECT_GENERIC,
				SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
			if (ret)
				return ret;
@@ -377,10 +381,10 @@ static int ib_add_type0_entries(struct kgsl_device *device,
	}

	if (ib_parse_vars->cp_addr_regs[ADRENO_CP_ADDR_VSC_SIZE_ADDRESS]) {
		ret = adreno_ib_add_range(process,
		ret = adreno_ib_add(process,
			ib_parse_vars->cp_addr_regs[
				ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] & mask,
			0, SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
		if (ret)
			return ret;
		ib_parse_vars->cp_addr_regs[
@@ -389,9 +393,9 @@ static int ib_add_type0_entries(struct kgsl_device *device,
	mask = 0xFFFFFFE0;
	for (i = ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR;
		i <= ADRENO_CP_ADDR_SP_FS_OBJ_START_REG; i++) {
		ret = adreno_ib_add_range(process,
		ret = adreno_ib_add(process,
			ib_parse_vars->cp_addr_regs[i] & mask,
			0, SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
		if (ret)
			return ret;
		ib_parse_vars->cp_addr_regs[i] = 0;
@@ -417,47 +421,47 @@ static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
	switch (opcode) {
	case CP_DRAW_INDX:
		if (type3_pkt_size(pkt[0]) > 3) {
			ret = adreno_ib_add_range(process,
				pkt[4], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		}
		break;
	case CP_DRAW_INDX_OFFSET:
		if (type3_pkt_size(pkt[0]) == 6) {
			ret = adreno_ib_add_range(process,
				pkt[5], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		}
		break;
	case CP_DRAW_INDIRECT:
		if (type3_pkt_size(pkt[0]) == 2) {
			ret = adreno_ib_add_range(process,
				pkt[2], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		}
		break;
	case CP_DRAW_INDX_INDIRECT:
		if (type3_pkt_size(pkt[0]) == 4) {
			ret = adreno_ib_add_range(process,
				pkt[2], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
			if (ret)
				break;
			ret = adreno_ib_add_range(process,
				pkt[4], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		}
		break;
	case CP_DRAW_AUTO:
		if (type3_pkt_size(pkt[0]) == 6) {
			ret = adreno_ib_add_range(process,
				 pkt[3], 0, SNAPSHOT_GPU_OBJECT_GENERIC,
			ret = adreno_ib_add(process,
				 pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
			if (ret)
				break;
			ret = adreno_ib_add_range(process,
				pkt[4], 0,
				SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
			ret = adreno_ib_add(process,
				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
				ib_obj_list);
		}
		break;
	}
@@ -633,8 +637,8 @@ static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
					ADRENO_CP_UCHE_INVALIDATE0)) ||
				(offset == adreno_cp_parser_getreg(adreno_dev,
					ADRENO_CP_UCHE_INVALIDATE1))) {
					ret = adreno_ib_add_range(process,
						ptr[i + 1] & 0xFFFFFFC0, 0,
					ret = adreno_ib_add(process,
						ptr[i + 1] & 0xFFFFFFC0,
						SNAPSHOT_GPU_OBJECT_GENERIC,
						ib_obj_list);
					if (ret)
@@ -847,11 +851,15 @@ static int adreno_ib_find_objs(struct kgsl_device *device,
			return 0;
	}

	entry = kgsl_sharedmem_find_region(process, gpuaddr,
					(dwords << 2));
	entry = kgsl_sharedmem_find(process, gpuaddr);
	if (!entry)
		return -EINVAL;

	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, (dwords << 2))) {
		kgsl_mem_entry_put(entry);
		return -EINVAL;
	}

	src = kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
	if (!src) {
		kgsl_mem_entry_put(entry);
@@ -860,8 +868,7 @@ static int adreno_ib_find_objs(struct kgsl_device *device,

	memset(&ib_parse_vars, 0, sizeof(struct ib_parser_variables));

	ret = adreno_ib_add_range(process, gpuaddr, dwords << 2,
				obj_type, ib_obj_list);
	ret = adreno_ib_add(process, gpuaddr, obj_type, ib_obj_list);
	if (ret)
		goto done;

+44 −20
Original line number Diff line number Diff line
@@ -83,13 +83,20 @@ static void push_object(int type,
		return;
	}

	entry = kgsl_sharedmem_find_region(process, gpuaddr, dwords << 2);
	entry = kgsl_sharedmem_find(process, gpuaddr);
	if (entry == NULL) {
		KGSL_CORE_ERR("snapshot: Can't find entry for 0x%016llX\n",
			gpuaddr);
		return;
	}

	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) {
		KGSL_CORE_ERR("snapshot: Mem entry 0x%016llX is too small\n",
			gpuaddr);
		kgsl_mem_entry_put(entry);
		return;
	}

	/* Put it on the list of things to parse */
	objbuf[objbufptr].type = type;
	objbuf[objbufptr].gpuaddr = gpuaddr;
@@ -380,14 +387,34 @@ static size_t snapshot_rb(struct kgsl_device *device, u8 *buf,
	return KGSL_RB_SIZE + sizeof(*header);
}

static int _count_mem_entries(int id, void *ptr, void *data)
{
	int *count = data;
	*count = *count + 1;
	return 0;
}

static int _save_mem_entries(int id, void *ptr, void *data)
{
	struct kgsl_mem_entry *entry = ptr;
	unsigned int **p = data;
	unsigned int *local = *p;

	*local++ = (unsigned int) entry->memdesc.gpuaddr;
	*local++ = (unsigned int) entry->memdesc.size;
	*local++ = kgsl_memdesc_get_memtype(&entry->memdesc);

	*p = local;
	return 0;
}

static size_t snapshot_capture_mem_list(struct kgsl_device *device,
		u8 *buf, size_t remain, void *priv)
{
	struct kgsl_snapshot_replay_mem_list *header =
		(struct kgsl_snapshot_replay_mem_list *)buf;
	struct rb_node *node;
	struct kgsl_mem_entry *entry = NULL;
	int num_mem;
	int num_mem = 0;
	int ret = 0;
	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
	struct kgsl_process_private *process = priv;

@@ -395,20 +422,20 @@ static size_t snapshot_capture_mem_list(struct kgsl_device *device,
	if (process == NULL)
		return 0;

	/* We need to know the number of memory objects that the process has */
	spin_lock(&process->mem_lock);
	for (node = rb_first(&process->mem_rb), num_mem = 0; node; ) {
		entry = rb_entry(node, struct kgsl_mem_entry, node);
		node = rb_next(&entry->node);
		num_mem++;
	}

	/* We need to know the number of memory objects that the process has */
	idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem);

	if (num_mem == 0)
		goto out;

	if (remain < ((num_mem * 3 * sizeof(unsigned int)) +
			sizeof(*header))) {
		KGSL_CORE_ERR("snapshot: Not enough memory for the mem list");
		spin_unlock(&process->mem_lock);
		return 0;
		goto out;
	}

	header->num_entries = num_mem;
	header->ptbase =
	 (__u32)kgsl_mmu_pagetable_get_ptbase(process->pagetable);
@@ -416,16 +443,13 @@ static size_t snapshot_capture_mem_list(struct kgsl_device *device,
	 * Walk throught the memory list and store the
	 * tuples(gpuaddr, size, memtype) in snapshot
	 */
	for (node = rb_first(&process->mem_rb); node; ) {
		entry = rb_entry(node, struct kgsl_mem_entry, node);
		node = rb_next(&entry->node);

		*data++ = (unsigned int) entry->memdesc.gpuaddr;
		*data++ = (unsigned int) entry->memdesc.size;
		*data++ = kgsl_memdesc_get_memtype(&entry->memdesc);
	}
	idr_for_each(&process->mem_idr, _save_mem_entries, &data);

	ret = sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
out:
	spin_unlock(&process->mem_lock);
	return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
	return ret;
}

struct snapshot_ib_meta {
+180 −543

File changed.

Preview size limit exceeded, changes collapsed.

+2 −12
Original line number Diff line number Diff line
@@ -40,15 +40,6 @@
/* Timestamp window used to detect rollovers (half of integer range) */
#define KGSL_TIMESTAMP_WINDOW 0x80000000

/* The SVM upper bound is the same as the TASK_SIZE in arm32 */
#define KGSL_SVM_UPPER_BOUND (0xC0000000 - SZ_16M)

/*
 * Defines the lowest possible addresses for SVM map. The VA space below
 * has been reserved for GMEM and SP Memory regions.
 */
#define KGSL_SVM_LOWER_BOUND 0x300000

/* A macro for memory statistics - add the new size to the stat and if
   the statisic is greater then _max, set _max
*/
@@ -324,9 +315,8 @@ long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv,

void kgsl_mem_entry_destroy(struct kref *kref);

struct kgsl_mem_entry *kgsl_sharedmem_find_region(
	struct kgsl_process_private *private, uint64_t gpuaddr,
	uint64_t size);
struct kgsl_mem_entry * __must_check
kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr);

struct kgsl_mem_entry * __must_check
kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id);
+9 −27
Original line number Diff line number Diff line
@@ -604,18 +604,18 @@ static void add_profiling_buffer(struct kgsl_device *device,
	if (cmdbatch->profiling_buf_entry != NULL)
		return;

	if (id != 0) {
	if (id != 0)
		entry = kgsl_sharedmem_find_id(cmdbatch->context->proc_priv,
				id);
	else
		entry = kgsl_sharedmem_find(cmdbatch->context->proc_priv,
			gpuaddr);

		/* Make sure the offset is in range */
		if (entry && offset > entry->memdesc.size) {
	if (entry != NULL) {
		if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) {
			kgsl_mem_entry_put(entry);
			entry = NULL;
		}
	} else {
		entry = kgsl_sharedmem_find_region(cmdbatch->context->proc_priv,
			gpuaddr, size);
	}

	if (entry == NULL) {
@@ -672,27 +672,9 @@ int kgsl_cmdbatch_add_ibdesc(struct kgsl_device *device,
		/* add to the memlist */
		list_add_tail(&mem->node, &cmdbatch->memlist);

		/*
		 * If the memlist contains a cmdbatch profiling buffer, store
		 * the mem_entry containing the buffer and the gpuaddr at
		 * which the buffer can be found
		 */
		if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING &&
			ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER &&
			!cmdbatch->profiling_buf_entry) {
			cmdbatch->profiling_buf_entry =
				kgsl_sharedmem_find_region(
				cmdbatch->context->proc_priv, mem->gpuaddr,
				mem->size);
			if (!cmdbatch->profiling_buf_entry) {
				WARN_ONCE(1,
				"No mem entry for profiling buf, gpuaddr=%llx\n",
				mem->gpuaddr);
				return 0;
			}

			cmdbatch->profiling_buffer_gpuaddr = mem->gpuaddr;
		}
		if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER)
			add_profiling_buffer(device, cmdbatch, mem->gpuaddr,
				mem->size, 0, 0);
	} else {
		/* Ignore if SYNC or MARKER is specified */
		if (cmdbatch->flags &
Loading