Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c0e0d5d3 authored by Liam Mark's avatar Liam Mark
Browse files

dma-buf: Add support for partial cache maintenance



In order to improve performance allow dma-buf clients the ability
to apply cache maintenance to only a subset of a dma-buf.

Kernel clients will be able to use dma_buf_begin_cpu_access_partial
and dma_buf_end_cpu_access_partial functions to only apply cache
maintenance to a range within the dma-buf.

Userspace clients will be able to restrict cache maintenance to only
the subset of the dma-buf which is mmap(ed) by setting the
DMA_BUF_SYNC_USER_MAPPED flag when calling the DMA_BUF_IOCTL_SYNC IOCT.

Change-Id: Id02e240ec43de636bb801929016c8d351b2ae96d
Signed-off-by: default avatarLiam Mark <lmark@codeaurora.org>
parent 156f49e6
Loading
Loading
Loading
Loading
+96 −7
Original line number Diff line number Diff line
@@ -276,12 +276,19 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
	return events;
}

static int dma_buf_begin_cpu_access_umapped(struct dma_buf *dmabuf,
					    enum dma_data_direction direction);


static int dma_buf_end_cpu_access_umapped(struct dma_buf *dmabuf,
					  enum dma_data_direction direction);

static long dma_buf_ioctl(struct file *file,
			  unsigned int cmd, unsigned long arg)
{
	struct dma_buf *dmabuf;
	struct dma_buf_sync sync;
	enum dma_data_direction direction;
	enum dma_data_direction dir;
	int ret;

	dmabuf = file->private_data;
@@ -296,22 +303,30 @@ static long dma_buf_ioctl(struct file *file,

		switch (sync.flags & DMA_BUF_SYNC_RW) {
		case DMA_BUF_SYNC_READ:
			direction = DMA_FROM_DEVICE;
			dir = DMA_FROM_DEVICE;
			break;
		case DMA_BUF_SYNC_WRITE:
			direction = DMA_TO_DEVICE;
			dir = DMA_TO_DEVICE;
			break;
		case DMA_BUF_SYNC_RW:
			direction = DMA_BIDIRECTIONAL;
			dir = DMA_BIDIRECTIONAL;
			break;
		default:
			return -EINVAL;
		}

		if (sync.flags & DMA_BUF_SYNC_END)
			ret = dma_buf_end_cpu_access(dmabuf, direction);
			if (sync.flags & DMA_BUF_SYNC_USER_MAPPED)
				ret = dma_buf_end_cpu_access_umapped(dmabuf,
								     dir);
			else
				ret = dma_buf_end_cpu_access(dmabuf, dir);
		else
			ret = dma_buf_begin_cpu_access(dmabuf, direction);
			if (sync.flags & DMA_BUF_SYNC_USER_MAPPED)
				ret = dma_buf_begin_cpu_access_umapped(dmabuf,
								       dir);
			else
				ret = dma_buf_begin_cpu_access(dmabuf, dir);

		return ret;
	default:
@@ -746,7 +761,8 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 *     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
 *       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
 *       want (with the new data being consumed by say the GPU or the scanout
 *       device)
 *       device). Optionally SYNC_USER_MAPPED can be set to restrict cache
 *       maintenance to only the parts of the buffer which are mmap(ed).
 *     - munmap once you don't need the buffer any more
 *
 *    For correctness and optimal performance, it is always required to use
@@ -833,6 +849,50 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
}
EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);

static int dma_buf_begin_cpu_access_umapped(struct dma_buf *dmabuf,
			     enum dma_data_direction direction)
{
	int ret = 0;

	if (WARN_ON(!dmabuf))
		return -EINVAL;

	if (dmabuf->ops->begin_cpu_access_umapped)
		ret = dmabuf->ops->begin_cpu_access_umapped(dmabuf, direction);

	/* Ensure that all fences are waited upon - but we first allow
	 * the native handler the chance to do so more efficiently if it
	 * chooses. A double invocation here will be reasonably cheap no-op.
	 */
	if (ret == 0)
		ret = __dma_buf_begin_cpu_access(dmabuf, direction);

	return ret;
}
int dma_buf_begin_cpu_access_partial(struct dma_buf *dmabuf,
				     enum dma_data_direction direction,
				     unsigned int offset, unsigned int len)
{
	int ret = 0;

	if (WARN_ON(!dmabuf))
		return -EINVAL;

	if (dmabuf->ops->begin_cpu_access_partial)
		ret = dmabuf->ops->begin_cpu_access_partial(dmabuf, direction,
							    offset, len);

	/* Ensure that all fences are waited upon - but we first allow
	 * the native handler the chance to do so more efficiently if it
	 * chooses. A double invocation here will be reasonably cheap no-op.
	 */
	if (ret == 0)
		ret = __dma_buf_begin_cpu_access(dmabuf, direction);

	return ret;
}
EXPORT_SYMBOL(dma_buf_begin_cpu_access_partial);

/**
 * dma_buf_end_cpu_access - Must be called after accessing a dma_buf from the
 * cpu in the kernel context. Calls end_cpu_access to allow exporter-specific
@@ -859,6 +919,35 @@ int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
}
EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);

int dma_buf_end_cpu_access_umapped(struct dma_buf *dmabuf,
			   enum dma_data_direction direction)
{
	int ret = 0;

	WARN_ON(!dmabuf);

	if (dmabuf->ops->end_cpu_access_umapped)
		ret = dmabuf->ops->end_cpu_access_umapped(dmabuf, direction);

	return ret;
}

int dma_buf_end_cpu_access_partial(struct dma_buf *dmabuf,
				   enum dma_data_direction direction,
				   unsigned int offset, unsigned int len)
{
	int ret = 0;

	WARN_ON(!dmabuf);

	if (dmabuf->ops->end_cpu_access_partial)
		ret = dmabuf->ops->end_cpu_access_partial(dmabuf, direction,
							  offset, len);

	return ret;
}
EXPORT_SYMBOL(dma_buf_end_cpu_access_partial);

/**
 * dma_buf_kmap_atomic - Map a page of the buffer object into kernel address
 * space. The same restrictions as for kmap_atomic and friends apply.
+113 −0
Original line number Diff line number Diff line
@@ -188,6 +188,68 @@ struct dma_buf_ops {
	 */
	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);

	/**
	 * @begin_cpu_access_umapped:
	 *
	 * This is called as a result of the DMA_BUF_IOCTL_SYNC IOCTL being
	 * called with the DMA_BUF_SYNC_START and DMA_BUF_SYNC_USER_MAPPED flags
	 * set. It allows the exporter to ensure that the mmap(ed) portions of
	 * the buffer are available for cpu access - the exporter might need to
	 * allocate or swap-in and pin the backing storage.
	 * The exporter also needs to ensure that cpu access is
	 * coherent for the access direction. The direction can be used by the
	 * exporter to optimize the cache flushing, i.e. access with a different
	 * direction (read instead of write) might return stale or even bogus
	 * data (e.g. when the exporter needs to copy the data to temporary
	 * storage).
	 *
	 * This callback is optional.
	 *
	 * Returns:
	 *
	 * 0 on success or a negative error code on failure. This can for
	 * example fail when the backing storage can't be allocated. Can also
	 * return -ERESTARTSYS or -EINTR when the call has been interrupted and
	 * needs to be restarted.
	 */
	int (*begin_cpu_access_umapped)(struct dma_buf *,
					enum dma_data_direction);

	/**
	 * @begin_cpu_access_partial:
	 *
	 * This is called from dma_buf_begin_cpu_access_partial() and allows the
	 * exporter to ensure that the memory specified in the range is
	 * available for cpu access - the exporter might need to allocate or
	 * swap-in and pin the backing storage.
	 * The exporter also needs to ensure that cpu access is
	 * coherent for the access direction. The direction can be used by the
	 * exporter to optimize the cache flushing, i.e. access with a different
	 * direction (read instead of write) might return stale or even bogus
	 * data (e.g. when the exporter needs to copy the data to temporary
	 * storage).
	 *
	 * This callback is optional.
	 *
	 * FIXME: This is both called through the DMA_BUF_IOCTL_SYNC command
	 * from userspace (where storage shouldn't be pinned to avoid handing
	 * de-factor mlock rights to userspace) and for the kernel-internal
	 * users of the various kmap interfaces, where the backing storage must
	 * be pinned to guarantee that the atomic kmap calls can succeed. Since
	 * there's no in-kernel users of the kmap interfaces yet this isn't a
	 * real problem.
	 *
	 * Returns:
	 *
	 * 0 on success or a negative error code on failure. This can for
	 * example fail when the backing storage can't be allocated. Can also
	 * return -ERESTARTSYS or -EINTR when the call has been interrupted and
	 * needs to be restarted.
	 */
	int (*begin_cpu_access_partial)(struct dma_buf *,
					enum dma_data_direction,
					unsigned int, unsigned int);

	/**
	 * @end_cpu_access:
	 *
@@ -206,6 +268,50 @@ struct dma_buf_ops {
	 * to be restarted.
	 */
	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);

	/**
	 * @end_cpu_access_umapped:
	 *
	 * This is called as result a of the DMA_BUF_IOCTL_SYNC IOCTL being
	 * called with the DMA_BUF_SYNC_END and DMA_BUF_SYNC_USER_MAPPED flags
	 * set. The exporter can use to limit cache flushing to only those parts
	 * of the buffer which are mmap(ed) and to unpin any resources pinned in
	 * @begin_cpu_access_umapped.
	 * The result of any dma_buf kmap calls after end_cpu_access_umapped is
	 * undefined.
	 *
	 * This callback is optional.
	 *
	 * Returns:
	 *
	 * 0 on success or a negative error code on failure. Can return
	 * -ERESTARTSYS or -EINTR when the call has been interrupted and needs
	 * to be restarted.
	 */
	int (*end_cpu_access_umapped)(struct dma_buf *,
				      enum dma_data_direction);

	/**
	 * @end_cpu_access_partial:
	 *
	 * This is called from dma_buf_end_cpu_access_partial() when the
	 * importer is done accessing the CPU. The exporter can use to limit
	 * cache flushing to only the range specefied and to unpin any
	 * resources pinned in @begin_cpu_access_umapped.
	 * The result of any dma_buf kmap calls after end_cpu_access_partial is
	 * undefined.
	 *
	 * This callback is optional.
	 *
	 * Returns:
	 *
	 * 0 on success or a negative error code on failure. Can return
	 * -ERESTARTSYS or -EINTR when the call has been interrupted and needs
	 * to be restarted.
	 */
	int (*end_cpu_access_partial)(struct dma_buf *, enum dma_data_direction,
				      unsigned int, unsigned int);

	void *(*map_atomic)(struct dma_buf *, unsigned long);
	void (*unmap_atomic)(struct dma_buf *, unsigned long, void *);
	void *(*map)(struct dma_buf *, unsigned long);
@@ -408,8 +514,15 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
				enum dma_data_direction);
int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
			     enum dma_data_direction dir);
int dma_buf_begin_cpu_access_partial(struct dma_buf *dma_buf,
				     enum dma_data_direction dir,
				     unsigned int offset,
				     unsigned int len);
int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
			   enum dma_data_direction dir);
int dma_buf_end_cpu_access_partial(struct dma_buf *dma_buf,
				   enum dma_data_direction dir,
				   unsigned int offset, unsigned int len);
void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
void *dma_buf_kmap(struct dma_buf *, unsigned long);
+3 −1
Original line number Diff line number Diff line
@@ -32,8 +32,10 @@ struct dma_buf_sync {
#define DMA_BUF_SYNC_RW        (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE)
#define DMA_BUF_SYNC_START     (0 << 2)
#define DMA_BUF_SYNC_END       (1 << 2)
#define DMA_BUF_SYNC_USER_MAPPED       (1 << 3)

#define DMA_BUF_SYNC_VALID_FLAGS_MASK \
	(DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END)
	(DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END | DMA_BUF_SYNC_USER_MAPPED)

#define DMA_BUF_BASE		'b'
#define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)