drm/i915: Move GEM activity tracking into a common struct reservation_object (d07f0e59) · Commits · e / devices / android_kernel_oneplus_sm8150

drivers/gpu/drm/i915/i915_debugfs.c

+3 −12

Original line number	Diff line number	Diff line
		@@ -136,11 +136,10 @@ describe_obj(struct seq_file m, struct drm_i915_gem_object obj)
		struct i915_vma *vma;
		unsigned int frontbuffer_bits;
		int pin_count = 0;
		enum intel_engine_id id;

		lockdep_assert_held(&obj->base.dev->struct_mutex);

		seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
		seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
		&obj->base,
		get_active_flag(obj),
		get_pin_flag(obj),
		@@ -149,14 +148,7 @@ describe_obj(struct seq_file m, struct drm_i915_gem_object obj)
		get_pin_mapped_flag(obj),
		obj->base.size / 1024,
		obj->base.read_domains,
		obj->base.write_domain);
		for_each_engine(engine, dev_priv, id)
		seq_printf(m, "%x ",
		i915_gem_active_get_seqno(&obj->last_read[id],
		&obj->base.dev->struct_mutex));
		seq_printf(m, "] %x %s%s%s",
		i915_gem_active_get_seqno(&obj->last_write,
		&obj->base.dev->struct_mutex),
		obj->base.write_domain,
		i915_cache_level_str(dev_priv, obj->cache_level),
		obj->mm.dirty ? " dirty" : "",
		obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
		@@ -187,8 +179,7 @@ describe_obj(struct seq_file m, struct drm_i915_gem_object obj)
		if (obj->stolen)
		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);

		engine = i915_gem_active_get_engine(&obj->last_write,
		&dev_priv->drm.struct_mutex);
		engine = i915_gem_object_last_write_engine(obj);
		if (engine)
		seq_printf(m, " (%s)", engine->name);

drivers/gpu/drm/i915/i915_drv.h

+24 −38

Original line number	Diff line number	Diff line
		@@ -41,6 +41,7 @@
		#include <linux/intel-iommu.h>
		#include <linux/kref.h>
		#include <linux/pm_qos.h>
		#include <linux/reservation.h>
		#include <linux/shmem_fs.h>

		#include <drm/drmP.h>
		@@ -2246,21 +2247,12 @@ struct drm_i915_gem_object {
		struct list_head batch_pool_link;

		unsigned long flags;
		/**
		* This is set if the object is on the active lists (has pending
		* rendering and so a non-zero seqno), and is not set if it i s on
		* inactive (ready to be unbound) list.
		*/
		#define I915_BO_ACTIVE_SHIFT 0
		#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
		#define __I915_BO_ACTIVE(bo) \
		((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)

		/**
		* Have we taken a reference for the object for incomplete GPU
		* activity?
		*/
		#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
		#define I915_BO_ACTIVE_REF 0

		/*
		* Is the object to be mapped as read-only to the GPU
		@@ -2281,6 +2273,7 @@ struct drm_i915_gem_object {

		/** Count of VMA actually bound by this object */
		unsigned int bind_count;
		unsigned int active_count;
		unsigned int pin_display;

		struct {
		@@ -2320,8 +2313,7 @@ struct drm_i915_gem_object {
		* read request. This allows for the CPU to read from an active
		* buffer by only waiting for the write to complete.
		*/
		struct i915_gem_active last_read[I915_NUM_ENGINES];
		struct i915_gem_active last_write;
		struct reservation_object *resv;

		/** References from framebuffers, locks out tiling changes. */
		unsigned long framebuffer_references;
		@@ -2340,6 +2332,8 @@ struct drm_i915_gem_object {

		/** for phys allocated objects */
		struct drm_dma_handle *phys_handle;

		struct reservation_object __builtin_resv;
		};

		static inline struct drm_i915_gem_object *
		@@ -2425,35 +2419,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
		return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
		}

		static inline unsigned long
		i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
		{
		return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
		}

		static inline bool
		i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
		{
		return i915_gem_object_get_active(obj);
		}

		static inline void
		i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
		{
		obj->flags \|= BIT(engine + I915_BO_ACTIVE_SHIFT);
		}

		static inline void
		i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
		{
		obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
		}

		static inline bool
		i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
		int engine)
		{
		return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
		return obj->active_count;
		}

		static inline bool
		@@ -2496,6 +2465,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
		return obj->tiling_and_stride & STRIDE_MASK;
		}

		static inline struct intel_engine_cs *
		i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
		{
		struct intel_engine_cs *engine = NULL;
		struct dma_fence *fence;

		rcu_read_lock();
		fence = reservation_object_get_excl_rcu(obj->resv);
		rcu_read_unlock();

		if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
		engine = to_request(fence)->engine;
		dma_fence_put(fence);

		return engine;
		}

		static inline struct i915_vma i915_vma_get(struct i915_vma vma)
		{
		i915_gem_object_get(vma->obj);

drivers/gpu/drm/i915/i915_gem.c

+75 −188

Original line number	Diff line number	Diff line
		@@ -29,7 +29,6 @@
		#include <drm/drm_vma_manager.h>
		#include <drm/i915_drm.h>
		#include "i915_drv.h"
		#include "i915_gem_dmabuf.h"
		#include "i915_vgpu.h"
		#include "i915_trace.h"
		#include "intel_drv.h"
		@@ -447,11 +446,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
		long timeout,
		struct intel_rps_client *rps)
		{
		struct reservation_object *resv;
		struct i915_gem_active *active;
		unsigned long active_mask;
		int idx;

		might_sleep();
		#if IS_ENABLED(CONFIG_LOCKDEP)
		GEM_BUG_ON(debug_locks &&
		@@ -460,31 +454,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
		#endif
		GEM_BUG_ON(timeout < 0);

		if (flags & I915_WAIT_ALL) {
		active = obj->last_read;
		active_mask = i915_gem_object_get_active(obj);
		} else {
		active_mask = 1;
		active = &obj->last_write;
		}

		for_each_active(active_mask, idx) {
		struct drm_i915_gem_request *request;

		request = i915_gem_active_get_unlocked(&active[idx]);
		if (request) {
		timeout = i915_gem_object_wait_fence(&request->fence,
		flags, timeout,
		rps);
		i915_gem_request_put(request);
		}
		if (timeout < 0)
		return timeout;
		}

		resv = i915_gem_object_get_dmabuf_resv(obj);
		if (resv)
		timeout = i915_gem_object_wait_reservation(resv,
		timeout = i915_gem_object_wait_reservation(obj->resv,
		flags, timeout,
		rps);
		return timeout < 0 ? timeout : 0;
		@@ -2549,44 +2519,6 @@ void i915_gem_object_pin_map(struct drm_i915_gem_object obj,
		goto out_unlock;
		}

		static void
		i915_gem_object_retire__write(struct i915_gem_active *active,
		struct drm_i915_gem_request *request)
		{
		struct drm_i915_gem_object *obj =
		container_of(active, struct drm_i915_gem_object, last_write);

		intel_fb_obj_flush(obj, true, ORIGIN_CS);
		}

		static void
		i915_gem_object_retire__read(struct i915_gem_active *active,
		struct drm_i915_gem_request *request)
		{
		int idx = request->engine->id;
		struct drm_i915_gem_object *obj =
		container_of(active, struct drm_i915_gem_object, last_read[idx]);

		GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));

		i915_gem_object_clear_active(obj, idx);
		if (i915_gem_object_is_active(obj))
		return;

		/* Bump our place on the bound list to keep it roughly in LRU order
		* so that we don't steal from recently used but inactive objects
		* (unless we are forced to ofc!)
		*/
		if (obj->bind_count)
		list_move_tail(&obj->global_list,
		&request->i915->mm.bound_list);

		if (i915_gem_object_has_active_reference(obj)) {
		i915_gem_object_clear_active_reference(obj);
		i915_gem_object_put(obj);
		}
		}

		static bool i915_context_is_banned(const struct i915_gem_context *ctx)
		{
		unsigned long elapsed;
		@@ -2966,6 +2898,13 @@ int i915_vma_unbind(struct i915_vma *vma)
		* In order to prevent it from being recursively closed,
		* take a pin on the vma so that the second unbind is
		* aborted.
		*
		* Even more scary is that the retire callback may free
		* the object (last active vma). To prevent the explosion
		* we defer the actual object free to a worker that can
		* only proceed once it acquires the struct_mutex (which
		* we currently hold, therefore it cannot free this object
		* before we are finished).
		*/
		__i915_vma_pin(vma);

		@@ -4010,83 +3949,42 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
		}

		static __always_inline unsigned int
		__busy_set_if_active(const struct i915_gem_active *active,
		__busy_set_if_active(const struct dma_fence *fence,
		unsigned int (*flag)(unsigned int id))
		{
		struct drm_i915_gem_request *request;

		request = rcu_dereference(active->request);
		if (!request \|\| i915_gem_request_completed(request))
		return 0;
		struct drm_i915_gem_request *rq;

		/* This is racy. See __i915_gem_active_get_rcu() for an in detail
		* discussion of how to handle the race correctly, but for reporting
		* the busy state we err on the side of potentially reporting the
		* wrong engine as being busy (but we guarantee that the result
		* is at least self-consistent).
		*
		* As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
		* whilst we are inspecting it, even under the RCU read lock as we are.
		* This means that there is a small window for the engine and/or the
		* seqno to have been overwritten. The seqno will always be in the
		* future compared to the intended, and so we know that if that
		* seqno is idle (on whatever engine) our request is idle and the
		* return 0 above is correct.
		*
		* The issue is that if the engine is switched, it is just as likely
		* to report that it is busy (but since the switch happened, we know
		* the request should be idle). So there is a small chance that a busy
		* result is actually the wrong engine.
		*
		* So why don't we care?
		*
		* For starters, the busy ioctl is a heuristic that is by definition
		* racy. Even with perfect serialisation in the driver, the hardware
		* state is constantly advancing - the state we report to the user
		* is stale.
		*
		* The critical information for the busy-ioctl is whether the object
		* is idle as userspace relies on that to detect whether its next
		* access will stall, or if it has missed submitting commands to
		* the hardware allowing the GPU to stall. We never generate a
		* false-positive for idleness, thus busy-ioctl is reliable at the
		* most fundamental level, and we maintain the guarantee that a
		* busy object left to itself will eventually become idle (and stay
		* idle!).
		*
		* We allow ourselves the leeway of potentially misreporting the busy
		* state because that is an optimisation heuristic that is constantly
		* in flux. Being quickly able to detect the busy/idle state is much
		* more important than accurate logging of exactly which engines were
		* busy.
		*
		* For accuracy in reporting the engine, we could use
		*
		* result = 0;
		* request = __i915_gem_active_get_rcu(active);
		* if (request) {
		* if (!i915_gem_request_completed(request))
		* result = flag(request->engine->exec_id);
		* i915_gem_request_put(request);
		* }
		/* We have to check the current hw status of the fence as the uABI
		* guarantees forward progress. We could rely on the idle worker
		* to eventually flush us, but to minimise latency just ask the
		* hardware.
		*
		* but that still remains susceptible to both hardware and userspace
		* races. So we accept making the result of that race slightly worse,
		* given the rarity of the race and its low impact on the result.
		* Note we only report on the status of native fences.
		*/
		return flag(READ_ONCE(request->engine->exec_id));
		if (!dma_fence_is_i915(fence))
		return 0;

		/* opencode to_request() in order to avoid const warnings */
		rq = container_of(fence, struct drm_i915_gem_request, fence);
		if (i915_gem_request_completed(rq))
		return 0;

		return flag(rq->engine->exec_id);
		}

		static __always_inline unsigned int
		busy_check_reader(const struct i915_gem_active *active)
		busy_check_reader(const struct dma_fence *fence)
		{
		return __busy_set_if_active(active, __busy_read_flag);
		return __busy_set_if_active(fence, __busy_read_flag);
		}

		static __always_inline unsigned int
		busy_check_writer(const struct i915_gem_active *active)
		busy_check_writer(const struct dma_fence *fence)
		{
		return __busy_set_if_active(active, __busy_write_id);
		if (!fence)
		return 0;

		return __busy_set_if_active(fence, __busy_write_id);
		}

		int
		@@ -4095,63 +3993,55 @@ i915_gem_busy_ioctl(struct drm_device dev, void data,
		{
		struct drm_i915_gem_busy *args = data;
		struct drm_i915_gem_object *obj;
		unsigned long active;
		struct reservation_object_list *list;
		unsigned int seq;
		int err;

		err = -ENOENT;
		rcu_read_lock();
		obj = i915_gem_object_lookup_rcu(file, args->handle);
		if (!obj) {
		err = -ENOENT;
		if (!obj)
		goto out;
		}

		args->busy = 0;
		active = __I915_BO_ACTIVE(obj);
		if (active) {
		int idx;

		/* Yes, the lookups are intentionally racy.
		*
		* First, we cannot simply rely on __I915_BO_ACTIVE. We have
		* to regard the value as stale and as our ABI guarantees
		* forward progress, we confirm the status of each active
		* request with the hardware.
		/* A discrepancy here is that we do not report the status of
		* non-i915 fences, i.e. even though we may report the object as idle,
		* a call to set-domain may still stall waiting for foreign rendering.
		* This also means that wait-ioctl may report an object as busy,
		* where busy-ioctl considers it idle.
		*
		* Even though we guard the pointer lookup by RCU, that only
		* guarantees that the pointer and its contents remain
		* dereferencable and does not mean that the request we
		* have is the same as the one being tracked by the object.
		* We trade the ability to warn of foreign fences to report on which
		* i915 engines are active for the object.
		*
		* Consider that we lookup the request just as it is being
		* retired and freed. We take a local copy of the pointer,
		* but before we add its engine into the busy set, the other
		* thread reallocates it and assigns it to a task on another
		* engine with a fresh and incomplete seqno. Guarding against
		* that requires careful serialisation and reference counting,
		* i.e. using __i915_gem_active_get_request_rcu(). We don't,
		* instead we expect that if the result is busy, which engines
		* are busy is not completely reliable - we only guarantee
		* that the object was busy.
		*/

		for_each_active(active, idx)
		args->busy \|= busy_check_reader(&obj->last_read[idx]);

		/* For ABI sanity, we only care that the write engine is in
		* the set of read engines. This should be ensured by the
		* ordering of setting last_read/last_write in
		* i915_vma_move_to_active(), and then in reverse in retire.
		* However, for good measure, we always report the last_write
		* request as a busy read as well as being a busy write.
		* Alternatively, we can trade that extra information on read/write
		* activity with
		* args->busy =
		* !reservation_object_test_signaled_rcu(obj->resv, true);
		* to report the overall busyness. This is what the wait-ioctl does.
		*
		* We don't care that the set of active read/write engines
		* may change during construction of the result, as it is
		* equally liable to change before userspace can inspect
		* the result.
		*/
		args->busy \|= busy_check_writer(&obj->last_write);
		retry:
		seq = raw_read_seqcount(&obj->resv->seq);

		/* Translate the exclusive fence to the READ and WRITE engine */
		args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));

		/* Translate shared fences to READ set of engines */
		list = rcu_dereference(obj->resv->fence);
		if (list) {
		unsigned int shared_count = list->shared_count, i;

		for (i = 0; i < shared_count; ++i) {
		struct dma_fence *fence =
		rcu_dereference(list->shared[i]);

		args->busy \|= busy_check_reader(fence);
		}
		}

		if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
		goto retry;

		err = 0;
		out:
		rcu_read_unlock();
		return err;
		@@ -4216,23 +4106,19 @@ i915_gem_madvise_ioctl(struct drm_device dev, void data,
		void i915_gem_object_init(struct drm_i915_gem_object *obj,
		const struct drm_i915_gem_object_ops *ops)
		{
		int i;

		mutex_init(&obj->mm.lock);

		INIT_LIST_HEAD(&obj->global_list);
		INIT_LIST_HEAD(&obj->userfault_link);
		for (i = 0; i < I915_NUM_ENGINES; i++)
		init_request_active(&obj->last_read[i],
		i915_gem_object_retire__read);
		init_request_active(&obj->last_write,
		i915_gem_object_retire__write);
		INIT_LIST_HEAD(&obj->obj_exec_link);
		INIT_LIST_HEAD(&obj->vma_list);
		INIT_LIST_HEAD(&obj->batch_pool_link);

		obj->ops = ops;

		reservation_object_init(&obj->__builtin_resv);
		obj->resv = &obj->__builtin_resv;

		obj->frontbuffer_ggtt_origin = ORIGIN_GTT;

		obj->mm.madv = I915_MADV_WILLNEED;
		@@ -4385,6 +4271,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
		if (obj->base.import_attach)
		drm_prime_gem_destroy(&obj->base, NULL);

		reservation_object_fini(&obj->__builtin_resv);
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(i915, obj->base.size);

drivers/gpu/drm/i915/i915_gem_batch_pool.c

+9 −2

Original line number	Diff line number	Diff line
		@@ -114,11 +114,18 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,

		list_for_each_entry(tmp, list, batch_pool_link) {
		/* The batches are strictly LRU ordered */
		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
		&tmp->base.dev->struct_mutex))
		if (i915_gem_object_is_active(tmp))
		break;

		GEM_BUG_ON(!reservation_object_test_signaled_rcu(tmp->resv,
		true));

		if (tmp->base.size >= size) {
		/* Clear the set of shared fences early */
		ww_mutex_lock(&tmp->resv->lock, NULL);
		reservation_object_add_excl_fence(tmp->resv, NULL);
		ww_mutex_unlock(&tmp->resv->lock);

		obj = tmp;
		break;
		}

drivers/gpu/drm/i915/i915_gem_dmabuf.c

+3 −50

Original line number	Diff line number	Diff line
		@@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
		.end_cpu_access = i915_gem_end_cpu_access,
		};

		static void export_fences(struct drm_i915_gem_object *obj,
		struct dma_buf *dma_buf)
		{
		struct reservation_object *resv = dma_buf->resv;
		struct drm_i915_gem_request *req;
		unsigned long active;
		int idx;

		active = __I915_BO_ACTIVE(obj);
		if (!active)
		return;

		/* Serialise with execbuf to prevent concurrent fence-loops */
		mutex_lock(&obj->base.dev->struct_mutex);

		/* Mark the object for future fences before racily adding old fences */
		obj->base.dma_buf = dma_buf;

		ww_mutex_lock(&resv->lock, NULL);

		for_each_active(active, idx) {
		req = i915_gem_active_get(&obj->last_read[idx],
		&obj->base.dev->struct_mutex);
		if (!req)
		continue;

		if (reservation_object_reserve_shared(resv) == 0)
		reservation_object_add_shared_fence(resv, &req->fence);

		i915_gem_request_put(req);
		}

		req = i915_gem_active_get(&obj->last_write,
		&obj->base.dev->struct_mutex);
		if (req) {
		reservation_object_add_excl_fence(resv, &req->fence);
		i915_gem_request_put(req);
		}

		ww_mutex_unlock(&resv->lock);
		mutex_unlock(&obj->base.dev->struct_mutex);
		}

		struct dma_buf i915_gem_prime_export(struct drm_device dev,
		struct drm_gem_object *gem_obj, int flags)
		{
		struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
		DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
		struct dma_buf *dma_buf;

		exp_info.ops = &i915_dmabuf_ops;
		exp_info.size = gem_obj->size;
		exp_info.flags = flags;
		exp_info.priv = gem_obj;
		exp_info.resv = obj->resv;

		if (obj->ops->dmabuf_export) {
		int ret = obj->ops->dmabuf_export(obj);
		@@ -272,12 +229,7 @@ struct dma_buf i915_gem_prime_export(struct drm_device dev,
		return ERR_PTR(ret);
		}

		dma_buf = drm_gem_dmabuf_export(dev, &exp_info);
		if (IS_ERR(dma_buf))
		return dma_buf;

		export_fences(obj, dma_buf);
		return dma_buf;
		return drm_gem_dmabuf_export(dev, &exp_info);
		}

		static struct sg_table *
		@@ -335,6 +287,7 @@ struct drm_gem_object i915_gem_prime_import(struct drm_device dev,
		drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
		i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
		obj->base.import_attach = attach;
		obj->resv = dma_buf->resv;

		/* We use GTT as shorthand for a coherent domain, one that is
		* neither in the GPU cache nor in the CPU cache, where all