Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ec229a74 authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "msm: kgsl: Make the "scratch" global buffer use a random GPU address"

parents d2896f14 5ffb9e5b
Loading
Loading
Loading
Loading
+46 −12
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
/* Copyright (c) 2002,2007-2017,2019, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -259,6 +259,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
		PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
	if (ret)
		return ret;

	/* allocate a chunk of memory to create user profiling IB1s */
	kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc,
		PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc");

	return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
			KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
			0, "ringbuffer");
@@ -272,7 +277,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt)

	if (!adreno_is_a3xx(adreno_dev)) {
		status = kgsl_allocate_global(device, &device->scratch,
				PAGE_SIZE, 0, 0, "scratch");
				PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch");
		if (status != 0)
			return status;
	}
@@ -303,7 +308,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,

	kgsl_free_global(device, &rb->pagetable_desc);
	kgsl_free_global(device, &rb->preemption_desc);

	kgsl_free_global(device, &rb->profile_desc);
	kgsl_free_global(device, &rb->buffer_desc);
	kgsl_del_event_group(&rb->events);
	memset(rb, 0, sizeof(struct adreno_ringbuffer));
@@ -737,6 +742,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
	return (unsigned int)(p - cmds);
}

/* This is the maximum possible size for 64 bit targets */
#define PROFILE_IB_DWORDS 4
#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))

static int set_user_profiling(struct adreno_device *adreno_dev,
		struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr)
{
	int dwords, index = 0;
	u64 ib_gpuaddr;
	u32 *ib;

	if (!rb->profile_desc.hostptr)
		return 0;

	ib = ((u32 *) rb->profile_desc.hostptr) +
		(rb->profile_index * PROFILE_IB_DWORDS);
	ib_gpuaddr = rb->profile_desc.gpuaddr +
		(rb->profile_index * (PROFILE_IB_DWORDS << 2));

	dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr);

	/* Make an indirect buffer for the request */
	cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
	index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr);
	cmds[index++] = dwords;

	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;

	return index;
}

/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
		struct kgsl_drawobj_cmd *cmdobj,
@@ -836,14 +872,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	if (drawobj->flags & KGSL_DRAWOBJ_PROFILING &&
		!adreno_is_a3xx(adreno_dev) && profile_buffer) {
		user_profiling = true;
		dwords += 6;

		/*
		 * REG_TO_MEM packet on A5xx and above needs another ordinal.
		 * Add 2 more dwords since we do profiling before and after.
		 * User side profiling uses two IB1s, one before with 4 dwords
		 * per INDIRECT_BUFFER_PFE call
		 */
		if (!ADRENO_LEGACY_PM4(adreno_dev))
			dwords += 2;
		dwords += 8;

		/*
		 * we want to use an adreno_submit_time struct to get the
@@ -886,11 +920,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	}

	/*
	 * Add cmds to read the GPU ticks at the start of command obj and
	 * Add IB1 to read the GPU ticks at the start of command obj and
	 * write it into the appropriate command obj profiling buffer offset
	 */
	if (user_profiling) {
		cmds += _get_alwayson_counter(adreno_dev, cmds,
		cmds += set_user_profiling(adreno_dev, rb, cmds,
			cmdobj->profiling_buffer_gpuaddr +
			offsetof(struct kgsl_drawobj_profiling_buffer,
			gpu_ticks_submitted));
@@ -929,11 +963,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	}

	/*
	 * Add cmds to read the GPU ticks at the end of command obj and
	 * Add IB1 to read the GPU ticks at the end of command obj and
	 * write it into the appropriate command obj profiling buffer offset
	 */
	if (user_profiling) {
		cmds += _get_alwayson_counter(adreno_dev, cmds,
		cmds += set_user_profiling(adreno_dev, rb, cmds,
			cmdobj->profiling_buffer_gpuaddr +
			offsetof(struct kgsl_drawobj_profiling_buffer,
			gpu_ticks_retired));
+13 −1
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
/* Copyright (c) 2002,2007-2016,2019, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -127,6 +127,18 @@ struct adreno_ringbuffer {
	unsigned long sched_timer;
	enum adreno_dispatcher_starve_timer_states starve_timer_state;
	spinlock_t preempt_lock;
	/**
	 * @profile_desc: global memory to construct IB1s to do user side
	 * profiling
	 */
	struct kgsl_memdesc profile_desc;
	/**
	 * @profile_index: Pointer to the next "slot" in profile_desc for a user
	 * profiling IB1.  This allows for PAGE_SIZE / 16 = 256 simultaneous
	 * commands per ringbuffer with user profiling enabled
	 * enough.
	 */
	u32 profile_index;
};

/* Returns the current ringbuffer */
+3 −1
Original line number Diff line number Diff line
/* Copyright (c) 2008-2016, 2018, The Linux Foundation. All rights reserved.
/* Copyright (c) 2008-2016,2018-2019, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -189,6 +189,8 @@ struct kgsl_memdesc_ops {
#define KGSL_MEMDESC_TZ_LOCKED BIT(7)
/* The memdesc is allocated through contiguous memory */
#define KGSL_MEMDESC_CONTIG BIT(8)
/* For global buffers, randomly assign an address from the region */
#define KGSL_MEMDESC_RANDOM BIT(9)

/**
 * struct kgsl_memdesc - GPU memory object descriptor
+45 −15
Original line number Diff line number Diff line
/* Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
/* Copyright (c) 2011-2019, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -20,6 +20,7 @@
#include <linux/msm_kgsl.h>
#include <linux/ratelimit.h>
#include <linux/of_platform.h>
#include <linux/random.h>
#include <soc/qcom/scm.h>
#include <soc/qcom/secure_buffer.h>
#include <stddef.h>
@@ -84,15 +85,8 @@ static struct kmem_cache *addr_entry_cache;
 *
 * Here we define an array and a simple allocator to keep track of the currently
 * active global entries. Each entry is assigned a unique address inside of a
 * MMU implementation specific "global" region. The addresses are assigned
 * sequentially and never re-used to avoid having to go back and reprogram
 * existing pagetables. The entire list of active entries are mapped and
 * unmapped into every new pagetable as it is created and destroyed.
 *
 * Because there are relatively few entries and they are defined at boot time we
 * don't need to go over the top to define a dynamic allocation scheme. It will
 * be less wasteful to pick a static number with a little bit of growth
 * potential.
 * MMU implementation specific "global" region. We use a simple bitmap based
 * allocator for the region to allow for both fixed and dynamic addressing.
 */

#define GLOBAL_PT_ENTRIES 32
@@ -102,10 +96,12 @@ struct global_pt_entry {
	char name[32];
};

#define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT)

static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES];
static struct kgsl_memdesc *kgsl_global_secure_pt_entry;
static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES);
static int global_pt_count;
uint64_t global_pt_alloc;
static struct kgsl_memdesc gpu_qdss_desc;
static struct kgsl_memdesc gpu_qtimer_desc;

@@ -186,6 +182,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,

	for (i = 0; i < global_pt_count; i++) {
		if (global_pt_entries[i].memdesc == memdesc) {
			u64 offset = memdesc->gpuaddr -
				KGSL_IOMMU_GLOBAL_MEM_BASE(mmu);

			bitmap_clear(global_map, offset >> PAGE_SHIFT,
				kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT);

			memdesc->gpuaddr = 0;
			memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
			global_pt_entries[i].memdesc = NULL;
@@ -197,15 +199,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
static void kgsl_iommu_add_global(struct kgsl_mmu *mmu,
		struct kgsl_memdesc *memdesc, const char *name)
{
	u32 bit, start = 0;
	u64 size = kgsl_memdesc_footprint(memdesc);

	if (memdesc->gpuaddr != 0)
		return;

	BUG_ON(global_pt_count >= GLOBAL_PT_ENTRIES);
	BUG_ON((global_pt_alloc + memdesc->size) >= KGSL_IOMMU_GLOBAL_MEM_SIZE);
	if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES))
		return;

	if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE))
		return;

	if (memdesc->priv & KGSL_MEMDESC_RANDOM) {
		u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT);

		start = get_random_int() % range;
	}

	while (start >= 0) {
		bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES,
			start, size >> PAGE_SHIFT, 0);

		if (bit < GLOBAL_MAP_PAGES)
			break;

		start--;
	}

	if (WARN_ON(start < 0))
		return;

	memdesc->gpuaddr =
		KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT);

	bitmap_set(global_map, bit, size >> PAGE_SHIFT);

	memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc;
	memdesc->priv |= KGSL_MEMDESC_GLOBAL;
	global_pt_alloc += memdesc->size;

	global_pt_entries[global_pt_count].memdesc = memdesc;
	strlcpy(global_pt_entries[global_pt_count].name, name,