Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 956eb6cb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block

Pull aio updates from Jens Axboe:
 "Flushing out pre-patches for the buffered/polled aio series. Some
  fixes in here, but also optimizations"

* tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block:
  aio: abstract out io_event filler helper
  aio: split out iocb copy from io_submit_one()
  aio: use iocb_put() instead of open coding it
  aio: only use blk plugs for > 2 depth submissions
  aio: don't zero entire aio_kiocb aio_get_req()
  aio: separate out ring reservation from req allocation
  aio: use assigned completion handler
parents 0e9da3fb 875736bb
Loading
Loading
Loading
Loading
+88 −56
Original line number Original line Diff line number Diff line
@@ -70,6 +70,12 @@ struct aio_ring {
	struct io_event		io_events[0];
	struct io_event		io_events[0];
}; /* 128 bytes + ring size */
}; /* 128 bytes + ring size */


/*
 * Plugging is meant to work with larger batches of IOs. If we don't
 * have more than the below, then don't bother setting up a plug.
 */
#define AIO_PLUG_THRESHOLD	2

#define AIO_RING_PAGES	8
#define AIO_RING_PAGES	8


struct kioctx_table {
struct kioctx_table {
@@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
	local_irq_restore(flags);
	local_irq_restore(flags);
}
}


static bool get_reqs_available(struct kioctx *ctx)
static bool __get_reqs_available(struct kioctx *ctx)
{
{
	struct kioctx_cpu *kcpu;
	struct kioctx_cpu *kcpu;
	bool ret = false;
	bool ret = false;
@@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
	spin_unlock_irq(&ctx->completion_lock);
	spin_unlock_irq(&ctx->completion_lock);
}
}


static bool get_reqs_available(struct kioctx *ctx)
{
	if (__get_reqs_available(ctx))
		return true;
	user_refill_reqs_available(ctx);
	return __get_reqs_available(ctx);
}

/* aio_get_req
/* aio_get_req
 *	Allocate a slot for an aio request.
 *	Allocate a slot for an aio request.
 * Returns NULL if no requests are free.
 * Returns NULL if no requests are free.
@@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
{
	struct aio_kiocb *req;
	struct aio_kiocb *req;


	if (!get_reqs_available(ctx)) {
	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
		user_refill_reqs_available(ctx);
		if (!get_reqs_available(ctx))
			return NULL;
	}

	req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
	if (unlikely(!req))
	if (unlikely(!req))
		goto out_put;
		return NULL;


	percpu_ref_get(&ctx->reqs);
	percpu_ref_get(&ctx->reqs);
	req->ki_ctx = ctx;
	INIT_LIST_HEAD(&req->ki_list);
	INIT_LIST_HEAD(&req->ki_list);
	refcount_set(&req->ki_refcnt, 0);
	refcount_set(&req->ki_refcnt, 0);
	req->ki_ctx = ctx;
	req->ki_eventfd = NULL;
	return req;
	return req;
out_put:
	put_reqs_available(ctx, 1);
	return NULL;
}
}


static struct kioctx *lookup_ioctx(unsigned long ctx_id)
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
@@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
	}
	}
}
}


static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
			   long res, long res2)
{
	ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
	ev->data = iocb->ki_user_data;
	ev->res = res;
	ev->res2 = res2;
}

/* aio_complete
/* aio_complete
 *	Called when the io request on the given iocb is complete.
 *	Called when the io request on the given iocb is complete.
 */
 */
@@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
	event = ev_page + pos % AIO_EVENTS_PER_PAGE;


	event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
	aio_fill_event(event, iocb, res, res2);
	event->data = iocb->ki_user_data;
	event->res = res;
	event->res2 = res2;


	kunmap_atomic(ev_page);
	kunmap_atomic(ev_page);
	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
@@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
	aio_complete(iocb, res, res2);
	aio_complete(iocb, res, res2);
}
}


static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
{
	int ret;
	int ret;


@@ -1457,7 +1469,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
	return ret;
	return ret;
}
}


static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
		bool vectored, bool compat, struct iov_iter *iter)
		bool vectored, bool compat, struct iov_iter *iter)
{
{
	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
	void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
@@ -1492,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
		ret = -EINTR;
		ret = -EINTR;
		/*FALLTHRU*/
		/*FALLTHRU*/
	default:
	default:
		aio_complete_rw(req, ret, 0);
		req->ki_complete(req, ret, 0);
	}
	}
}
}


static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
		bool compat)
			bool vectored, bool compat)
{
{
	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
	struct iov_iter iter;
	struct iov_iter iter;
@@ -1529,8 +1541,8 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
	return ret;
	return ret;
}
}


static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
		bool compat)
			 bool vectored, bool compat)
{
{
	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
	struct iov_iter iter;
	struct iov_iter iter;
@@ -1585,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work)
	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}
}


static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
		     bool datasync)
{
{
	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
			iocb->aio_rw_flags))
			iocb->aio_rw_flags))
@@ -1713,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
	add_wait_queue(head, &pt->iocb->poll.wait);
	add_wait_queue(head, &pt->iocb->poll.wait);
}
}


static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{
{
	struct kioctx *ctx = aiocb->ki_ctx;
	struct kioctx *ctx = aiocb->ki_ctx;
	struct poll_iocb *req = &aiocb->poll;
	struct poll_iocb *req = &aiocb->poll;
@@ -1733,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
	if (unlikely(!req->file))
	if (unlikely(!req->file))
		return -EBADF;
		return -EBADF;


	req->head = NULL;
	req->woken = false;
	req->cancelled = false;

	apt.pt._qproc = aio_poll_queue_proc;
	apt.pt._qproc = aio_poll_queue_proc;
	apt.pt._key = req->events;
	apt.pt._key = req->events;
	apt.iocb = aiocb;
	apt.iocb = aiocb;
@@ -1781,44 +1798,44 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
	return 0;
	return 0;
}
}


static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
			 bool compat)
			   struct iocb __user *user_iocb, bool compat)
{
{
	struct aio_kiocb *req;
	struct aio_kiocb *req;
	struct iocb iocb;
	ssize_t ret;
	ssize_t ret;


	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
		return -EFAULT;

	/* enforce forwards compatibility on users */
	/* enforce forwards compatibility on users */
	if (unlikely(iocb.aio_reserved2)) {
	if (unlikely(iocb->aio_reserved2)) {
		pr_debug("EINVAL: reserve field set\n");
		pr_debug("EINVAL: reserve field set\n");
		return -EINVAL;
		return -EINVAL;
	}
	}


	/* prevent overflows */
	/* prevent overflows */
	if (unlikely(
	if (unlikely(
	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
	    ((ssize_t)iocb.aio_nbytes < 0)
	    ((ssize_t)iocb->aio_nbytes < 0)
	   )) {
	   )) {
		pr_debug("EINVAL: overflow check\n");
		pr_debug("EINVAL: overflow check\n");
		return -EINVAL;
		return -EINVAL;
	}
	}


	if (!get_reqs_available(ctx))
		return -EAGAIN;

	ret = -EAGAIN;
	req = aio_get_req(ctx);
	req = aio_get_req(ctx);
	if (unlikely(!req))
	if (unlikely(!req))
		return -EAGAIN;
		goto out_put_reqs_available;


	if (iocb.aio_flags & IOCB_FLAG_RESFD) {
	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
		/*
		/*
		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
		 * instance of the file* now. The file descriptor must be
		 * instance of the file* now. The file descriptor must be
		 * an eventfd() fd, and will be signaled for each completed
		 * an eventfd() fd, and will be signaled for each completed
		 * event using the eventfd_signal() function.
		 * event using the eventfd_signal() function.
		 */
		 */
		req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
		if (IS_ERR(req->ki_eventfd)) {
		if (IS_ERR(req->ki_eventfd)) {
			ret = PTR_ERR(req->ki_eventfd);
			ret = PTR_ERR(req->ki_eventfd);
			req->ki_eventfd = NULL;
			req->ki_eventfd = NULL;
@@ -1833,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
	}
	}


	req->ki_user_iocb = user_iocb;
	req->ki_user_iocb = user_iocb;
	req->ki_user_data = iocb.aio_data;
	req->ki_user_data = iocb->aio_data;


	switch (iocb.aio_lio_opcode) {
	switch (iocb->aio_lio_opcode) {
	case IOCB_CMD_PREAD:
	case IOCB_CMD_PREAD:
		ret = aio_read(&req->rw, &iocb, false, compat);
		ret = aio_read(&req->rw, iocb, false, compat);
		break;
		break;
	case IOCB_CMD_PWRITE:
	case IOCB_CMD_PWRITE:
		ret = aio_write(&req->rw, &iocb, false, compat);
		ret = aio_write(&req->rw, iocb, false, compat);
		break;
		break;
	case IOCB_CMD_PREADV:
	case IOCB_CMD_PREADV:
		ret = aio_read(&req->rw, &iocb, true, compat);
		ret = aio_read(&req->rw, iocb, true, compat);
		break;
		break;
	case IOCB_CMD_PWRITEV:
	case IOCB_CMD_PWRITEV:
		ret = aio_write(&req->rw, &iocb, true, compat);
		ret = aio_write(&req->rw, iocb, true, compat);
		break;
		break;
	case IOCB_CMD_FSYNC:
	case IOCB_CMD_FSYNC:
		ret = aio_fsync(&req->fsync, &iocb, false);
		ret = aio_fsync(&req->fsync, iocb, false);
		break;
		break;
	case IOCB_CMD_FDSYNC:
	case IOCB_CMD_FDSYNC:
		ret = aio_fsync(&req->fsync, &iocb, true);
		ret = aio_fsync(&req->fsync, iocb, true);
		break;
		break;
	case IOCB_CMD_POLL:
	case IOCB_CMD_POLL:
		ret = aio_poll(req, &iocb);
		ret = aio_poll(req, iocb);
		break;
		break;
	default:
	default:
		pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
		ret = -EINVAL;
		ret = -EINVAL;
		break;
		break;
	}
	}
@@ -1872,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
		goto out_put_req;
		goto out_put_req;
	return 0;
	return 0;
out_put_req:
out_put_req:
	put_reqs_available(ctx, 1);
	percpu_ref_put(&ctx->reqs);
	if (req->ki_eventfd)
	if (req->ki_eventfd)
		eventfd_ctx_put(req->ki_eventfd);
		eventfd_ctx_put(req->ki_eventfd);
	kmem_cache_free(kiocb_cachep, req);
	iocb_put(req);
out_put_reqs_available:
	put_reqs_available(ctx, 1);
	return ret;
	return ret;
}
}


static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
			 bool compat)
{
	struct iocb iocb;

	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
		return -EFAULT;

	return __io_submit_one(ctx, &iocb, user_iocb, compat);
}

/* sys_io_submit:
/* sys_io_submit:
 *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
 *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
 *	the number of iocbs queued.  May return -EINVAL if the aio_context
 *	the number of iocbs queued.  May return -EINVAL if the aio_context
@@ -1912,6 +1940,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
	if (nr > ctx->nr_events)
	if (nr > ctx->nr_events)
		nr = ctx->nr_events;
		nr = ctx->nr_events;


	if (nr > AIO_PLUG_THRESHOLD)
		blk_start_plug(&plug);
		blk_start_plug(&plug);
	for (i = 0; i < nr; i++) {
	for (i = 0; i < nr; i++) {
		struct iocb __user *user_iocb;
		struct iocb __user *user_iocb;
@@ -1925,6 +1954,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
		if (ret)
		if (ret)
			break;
			break;
	}
	}
	if (nr > AIO_PLUG_THRESHOLD)
		blk_finish_plug(&plug);
		blk_finish_plug(&plug);


	percpu_ref_put(&ctx->users);
	percpu_ref_put(&ctx->users);
@@ -1952,6 +1982,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
	if (nr > ctx->nr_events)
	if (nr > ctx->nr_events)
		nr = ctx->nr_events;
		nr = ctx->nr_events;


	if (nr > AIO_PLUG_THRESHOLD)
		blk_start_plug(&plug);
		blk_start_plug(&plug);
	for (i = 0; i < nr; i++) {
	for (i = 0; i < nr; i++) {
		compat_uptr_t user_iocb;
		compat_uptr_t user_iocb;
@@ -1965,6 +1996,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
		if (ret)
		if (ret)
			break;
			break;
	}
	}
	if (nr > AIO_PLUG_THRESHOLD)
		blk_finish_plug(&plug);
		blk_finish_plug(&plug);


	percpu_ref_put(&ctx->users);
	percpu_ref_put(&ctx->users);