Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5feb6f82 authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "msm: kgsl: Make the "scratch" global buffer use a random GPU address"

parents 580041f9 63767aa7
Loading
Loading
Loading
Loading
+45 −11
Original line number Diff line number Diff line
@@ -308,6 +308,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
		PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
	if (ret)
		return ret;

	/* allocate a chunk of memory to create user profiling IB1s */
	kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc,
		PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc");

	return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
			KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
			0, "ringbuffer");
@@ -322,7 +327,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt)

	if (!adreno_is_a3xx(adreno_dev)) {
		status = kgsl_allocate_global(device, &device->scratch,
				PAGE_SIZE, 0, 0, "scratch");
				PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch");
		if (status != 0)
			return status;
	}
@@ -365,7 +370,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

	kgsl_free_global(device, &rb->pagetable_desc);

	kgsl_free_global(device, &rb->profile_desc);
	kgsl_free_global(device, &rb->buffer_desc);
	kgsl_del_event_group(&rb->events);
	memset(rb, 0, sizeof(struct adreno_ringbuffer));
@@ -870,6 +875,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
	return (unsigned int)(p - cmds);
}

/* This is the maximum possible size for 64 bit targets */
#define PROFILE_IB_DWORDS 4
#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))

static int set_user_profiling(struct adreno_device *adreno_dev,
		struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr)
{
	int dwords, index = 0;
	u64 ib_gpuaddr;
	u32 *ib;

	if (!rb->profile_desc.hostptr)
		return 0;

	ib = ((u32 *) rb->profile_desc.hostptr) +
		(rb->profile_index * PROFILE_IB_DWORDS);
	ib_gpuaddr = rb->profile_desc.gpuaddr +
		(rb->profile_index * (PROFILE_IB_DWORDS << 2));

	dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr);

	/* Make an indirect buffer for the request */
	cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
	index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr);
	cmds[index++] = dwords;

	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;

	return index;
}

/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
		struct kgsl_drawobj_cmd *cmdobj,
@@ -970,14 +1006,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
		!adreno_is_a3xx(adreno_dev) &&
		(cmdobj->profiling_buf_entry != NULL)) {
		user_profiling = true;
		dwords += 6;

		/*
		 * REG_TO_MEM packet on A5xx and above needs another ordinal.
		 * Add 2 more dwords since we do profiling before and after.
		 * User side profiling uses two IB1s, one before with 4 dwords
		 * per INDIRECT_BUFFER_PFE call
		 */
		if (!ADRENO_LEGACY_PM4(adreno_dev))
			dwords += 2;
		dwords += 8;

		/*
		 * we want to use an adreno_submit_time struct to get the
@@ -1036,11 +1070,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	}

	/*
	 * Add cmds to read the GPU ticks at the start of command obj and
	 * Add IB1 to read the GPU ticks at the start of command obj and
	 * write it into the appropriate command obj profiling buffer offset
	 */
	if (user_profiling) {
		cmds += _get_alwayson_counter(adreno_dev, cmds,
		cmds += set_user_profiling(adreno_dev, rb, cmds,
			cmdobj->profiling_buffer_gpuaddr +
			offsetof(struct kgsl_drawobj_profiling_buffer,
			gpu_ticks_submitted));
@@ -1088,11 +1122,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	}

	/*
	 * Add cmds to read the GPU ticks at the end of command obj and
	 * Add IB1 to read the GPU ticks at the end of command obj and
	 * write it into the appropriate command obj profiling buffer offset
	 */
	if (user_profiling) {
		cmds += _get_alwayson_counter(adreno_dev, cmds,
		cmds += set_user_profiling(adreno_dev, rb, cmds,
			cmdobj->profiling_buffer_gpuaddr +
			offsetof(struct kgsl_drawobj_profiling_buffer,
			gpu_ticks_retired));
+13 −1
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved.
/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -133,6 +133,18 @@ struct adreno_ringbuffer {
	int preempted_midway;
	spinlock_t preempt_lock;
	bool skip_inline_wptr;
	/**
	 * @profile_desc: global memory to construct IB1s to do user side
	 * profiling
	 */
	struct kgsl_memdesc profile_desc;
	/**
	 * @profile_index: Pointer to the next "slot" in profile_desc for a user
	 * profiling IB1.  This allows for PAGE_SIZE / 16 = 256 simultaneous
	 * commands per ringbuffer with user profiling enabled
	 * enough.
	 */
	u32 profile_index;
};

/* Returns the current ringbuffer */
+2 −0
Original line number Diff line number Diff line
@@ -201,6 +201,8 @@ struct kgsl_memdesc_ops {
#define KGSL_MEMDESC_CONTIG BIT(8)
/* This is an instruction buffer */
#define KGSL_MEMDESC_UCODE BIT(9)
/* For global buffers, randomly assign an address from the region */
#define KGSL_MEMDESC_RANDOM BIT(10)

/**
 * struct kgsl_memdesc - GPU memory object descriptor
+20 −4
Original line number Diff line number Diff line
@@ -598,13 +598,29 @@ static void add_profiling_buffer(struct kgsl_device *device,
		return;
	}

	cmdobj->profiling_buf_entry = entry;

	if (id != 0)
	if (!id) {
		cmdobj->profiling_buffer_gpuaddr = gpuaddr;
	} else {
		u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer);

		/*
		 * Make sure there is enough room in the object to store the
		 * entire profiling buffer object
		 */
		if (off < offset || off >= entry->memdesc.size) {
			dev_err(device->dev,
				"ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n",
			drawobj->context->id, id, offset, gpuaddr, size);
			kgsl_mem_entry_put(entry);
			return;
		}

		cmdobj->profiling_buffer_gpuaddr =
			entry->memdesc.gpuaddr + offset;
	else
		cmdobj->profiling_buffer_gpuaddr = gpuaddr;
	}

	cmdobj->profiling_buf_entry = entry;
}

/**
+44 −16
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <linux/msm_kgsl.h>
#include <linux/ratelimit.h>
#include <linux/of_platform.h>
#include <linux/random.h>
#include <soc/qcom/scm.h>
#include <soc/qcom/secure_buffer.h>
#include <linux/compat.h>
@@ -90,15 +91,8 @@ static struct kmem_cache *addr_entry_cache;
 *
 * Here we define an array and a simple allocator to keep track of the currently
 * active global entries. Each entry is assigned a unique address inside of a
 * MMU implementation specific "global" region. The addresses are assigned
 * sequentially and never re-used to avoid having to go back and reprogram
 * existing pagetables. The entire list of active entries are mapped and
 * unmapped into every new pagetable as it is created and destroyed.
 *
 * Because there are relatively few entries and they are defined at boot time we
 * don't need to go over the top to define a dynamic allocation scheme. It will
 * be less wasteful to pick a static number with a little bit of growth
 * potential.
 * MMU implementation specific "global" region. We use a simple bitmap based
 * allocator for the region to allow for both fixed and dynamic addressing.
 */

#define GLOBAL_PT_ENTRIES 32
@@ -108,13 +102,17 @@ struct global_pt_entry {
	char name[32];
};

#define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT)

static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES];
static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES);

static int secure_global_size;
static int global_pt_count;
uint64_t global_pt_alloc;
static struct kgsl_memdesc gpu_qdss_desc;
static struct kgsl_memdesc gpu_qtimer_desc;
static unsigned int context_bank_number;

void kgsl_print_global_pt_entries(struct seq_file *s)
{
	int i;
@@ -209,6 +207,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,

	for (i = 0; i < global_pt_count; i++) {
		if (global_pt_entries[i].memdesc == memdesc) {
			u64 offset = memdesc->gpuaddr -
				KGSL_IOMMU_GLOBAL_MEM_BASE(mmu);

			bitmap_clear(global_map, offset >> PAGE_SHIFT,
				kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT);

			memdesc->gpuaddr = 0;
			memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
			global_pt_entries[i].memdesc = NULL;
@@ -220,19 +224,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
static void kgsl_iommu_add_global(struct kgsl_mmu *mmu,
		struct kgsl_memdesc *memdesc, const char *name)
{
	u32 bit, start = 0;
	u64 size = kgsl_memdesc_footprint(memdesc);

	if (memdesc->gpuaddr != 0)
		return;

	/*Check that we can fit the global allocations */
	if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES) ||
		WARN_ON((global_pt_alloc + memdesc->size) >=
			KGSL_IOMMU_GLOBAL_MEM_SIZE))
	if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES))
		return;

	if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE))
		return;

	if (memdesc->priv & KGSL_MEMDESC_RANDOM) {
		u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT);

		start = get_random_int() % range;
	}

	while (start >= 0) {
		bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES,
			start, size >> PAGE_SHIFT, 0);

		if (bit < GLOBAL_MAP_PAGES)
			break;

		start--;
	}

	if (WARN_ON(start < 0))
		return;

	memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc;
	memdesc->gpuaddr =
		KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT);

	bitmap_set(global_map, bit, size >> PAGE_SHIFT);

	memdesc->priv |= KGSL_MEMDESC_GLOBAL;
	global_pt_alloc += kgsl_memdesc_footprint(memdesc);

	global_pt_entries[global_pt_count].memdesc = memdesc;
	strlcpy(global_pt_entries[global_pt_count].name, name,