Merge tag 'io_uring-20190323' of git://git.kernel.dk/linux-block (1bdd3dbf) · Commits · e / devices / android_kernel_fairphone_FP5

MAINTAINERS

+10 −0

Original line number	Diff line number	Diff line
		@@ -8096,6 +8096,16 @@ F: include/linux/iommu.h
		F: include/linux/of_iommu.h
		F: include/linux/iova.h

		IO_URING
		M: Jens Axboe <axboe@kernel.dk>
		L: linux-block@vger.kernel.org
		L: linux-fsdevel@vger.kernel.org
		T: git git://git.kernel.dk/linux-block
		T: git git://git.kernel.dk/liburing
		S: Maintained
		F: fs/io_uring.c
		F: include/uapi/linux/io_uring.h

		IP MASQUERADING
		M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
		S: Maintained

block/bio.c

+24 −19

Original line number	Diff line number	Diff line
		@@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio bio, struct iov_iter iter)
		size = bio_add_page(bio, bv->bv_page, len,
		bv->bv_offset + iter->iov_offset);
		if (size == len) {
		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
		struct page *page;
		int i;

		/*
		* For the normal O_DIRECT case, we could skip grabbing this
		* reference and then not have to put them again when IO
		* completes. But this breaks some in-kernel users, like
		* splicing to/from a loop device, where we release the pipe
		* pages unconditionally. If we can fix that case, we can
		* get rid of the get here and the need to call
		* bio_release_pages() at IO completion time.
		*/
		mp_bvec_for_each_page(page, bv, i)
		get_page(page);
		}

		iov_iter_advance(iter, size);
		return 0;
		}
		@@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
		* This takes either an iterator pointing to user memory, or one pointing to
		* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
		* map them into the kernel. On IO completion, the caller should put those
		* pages. For now, when adding kernel pages, we still grab a reference to the
		* page. This isn't strictly needed for the common case, but some call paths
		* end up releasing pages from eg a pipe and we can't easily control these.
		* See comment in __bio_iov_bvec_add_pages().
		* pages. If we're adding kernel pages, and the caller told us it's safe to
		* do so, we just have to add the pages to the bio directly. We don't grab an
		* extra reference to those pages (the user should already have that), and we
		* don't put the page on IO completion. The caller needs to check if the bio is
		* flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
		* released.
		*
		* The function tries, but does not guarantee, to pin as many pages as
		* fit into the bio, or are requested in *iter, whatever is smaller. If
		@@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
		const bool is_bvec = iov_iter_is_bvec(iter);
		unsigned short orig_vcnt = bio->bi_vcnt;

		/*
		* If this is a BVEC iter, then the pages are kernel pages. Don't
		* release them on IO completion, if the caller asked us to.
		*/
		if (is_bvec && iov_iter_bvec_no_ref(iter))
		bio_set_flag(bio, BIO_NO_PAGE_REF);

		do {
		int ret;

		@@ -1696,6 +1699,7 @@ static void bio_dirty_fn(struct work_struct *work)
		next = bio->bi_private;

		bio_set_pages_dirty(bio);
		if (!bio_flagged(bio, BIO_NO_PAGE_REF))
		bio_release_pages(bio);
		bio_put(bio);
		}
		@@ -1713,6 +1717,7 @@ void bio_check_pages_dirty(struct bio *bio)
		goto defer;
		}

		if (!bio_flagged(bio, BIO_NO_PAGE_REF))
		bio_release_pages(bio);
		bio_put(bio);
		return;

fs/block_dev.c

+7 −5

Original line number	Diff line number	Diff line
		@@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
		if (should_dirty) {
		bio_check_pages_dirty(bio);
		} else {
		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
		struct bvec_iter_all iter_all;
		struct bio_vec *bvec;
		int i;
		struct bvec_iter_all iter_all;

		bio_for_each_segment_all(bvec, bio, i, iter_all)
		put_page(bvec->bv_page);
		}
		bio_put(bio);
		}
		}

fs/io_uring.c

+216 −223

Original line number	Diff line number	Diff line
		@@ -189,17 +189,28 @@ struct sqe_submit {
		bool needs_fixed_file;
		};

		/*
		* First field must be the file pointer in all the
		* iocb unions! See also 'struct kiocb' in <linux/fs.h>
		*/
		struct io_poll_iocb {
		struct file *file;
		struct wait_queue_head *head;
		__poll_t events;
		bool woken;
		bool done;
		bool canceled;
		struct wait_queue_entry wait;
		};

		/*
		* NOTE! Each of the iocb union members has the file pointer
		* as the first entry in their struct definition. So you can
		* access the file pointer through any of the sub-structs,
		* or directly as just 'ki_filp' in this struct.
		*/
		struct io_kiocb {
		union {
		struct file *file;
		struct kiocb rw;
		struct io_poll_iocb poll;
		};
		@@ -214,6 +225,7 @@ struct io_kiocb {
		#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
		#define REQ_F_FIXED_FILE 4 /* ctx owns file */
		#define REQ_F_SEQ_PREV 8 /* sequential with previous */
		#define REQ_F_PREPPED 16 /* prep already done */
		u64 user_data;
		u64 error;

		@@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
		}
		}

		static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
		static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
		{
		if (waitqueue_active(&ctx->wait))
		wake_up(&ctx->wait);
		if (waitqueue_active(&ctx->sqo_wait))
		wake_up(&ctx->sqo_wait);
		}

		static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
		long res, unsigned ev_flags)
		{
		unsigned long flags;

		spin_lock_irqsave(&ctx->completion_lock, flags);
		io_cqring_fill_event(ctx, ki_user_data, res, ev_flags);
		io_cqring_fill_event(ctx, user_data, res, ev_flags);
		io_commit_cqring(ctx);
		spin_unlock_irqrestore(&ctx->completion_lock, flags);

		if (waitqueue_active(&ctx->wait))
		wake_up(&ctx->wait);
		if (waitqueue_active(&ctx->sqo_wait))
		wake_up(&ctx->sqo_wait);
		io_cqring_ev_posted(ctx);
		}

		static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
		@@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
		static struct io_kiocb io_get_req(struct io_ring_ctx ctx,
		struct io_submit_state *state)
		{
		gfp_t gfp = GFP_KERNEL \| __GFP_NOWARN;
		struct io_kiocb *req;

		if (!percpu_ref_tryget(&ctx->refs))
		return NULL;

		if (!state) {
		req = kmem_cache_alloc(req_cachep, __GFP_NOWARN);
		req = kmem_cache_alloc(req_cachep, gfp);
		if (unlikely(!req))
		goto out;
		} else if (!state->free_reqs) {
		@@ -396,10 +414,18 @@ static struct io_kiocb io_get_req(struct io_ring_ctx ctx,
		int ret;

		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
		ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz,
		state->reqs);
		if (unlikely(ret <= 0))
		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);

		/*
		* Bulk alloc is all-or-nothing. If we fail to get a batch,
		* retry single alloc to be on the safe side.
		*/
		if (unlikely(ret <= 0)) {
		state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
		if (!state->reqs[0])
		goto out;
		ret = 1;
		}
		state->free_reqs = ret - 1;
		state->cur_req = 1;
		req = state->reqs[0];
		@@ -411,7 +437,8 @@ static struct io_kiocb io_get_req(struct io_ring_ctx ctx,

		req->ctx = ctx;
		req->flags = 0;
		refcount_set(&req->refs, 0);
		/* one is dropped after submission, the other at completion */
		refcount_set(&req->refs, 2);
		return req;
		out:
		io_ring_drop_ctx_refs(ctx, 1);
		@@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx ctx, void reqs, int nr)

		static void io_free_req(struct io_kiocb *req)
		{
		if (!refcount_read(&req->refs) \|\| refcount_dec_and_test(&req->refs)) {
		if (req->file && !(req->flags & REQ_F_FIXED_FILE))
		fput(req->file);
		io_ring_drop_ctx_refs(req->ctx, 1);
		kmem_cache_free(req_cachep, req);
		}

		static void io_put_req(struct io_kiocb *req)
		{
		if (refcount_dec_and_test(&req->refs))
		io_free_req(req);
		}

		/*
		@@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx ctx, unsigned int nr_events,
		struct list_head *done)
		{
		void *reqs[IO_IOPOLL_BATCH];
		int file_count, to_free;
		struct file *file = NULL;
		struct io_kiocb *req;
		int to_free;

		file_count = to_free = 0;
		to_free = 0;
		while (!list_empty(done)) {
		req = list_first_entry(done, struct io_kiocb, list);
		list_del(&req->list);

		io_cqring_fill_event(ctx, req->user_data, req->error, 0);

		reqs[to_free++] = req;
		(*nr_events)++;

		/*
		* Batched puts of the same file, to avoid dirtying the
		* file usage count multiple times, if avoidable.
		if (refcount_dec_and_test(&req->refs)) {
		/* If we're not using fixed files, we have to pair the
		* completion part with the file put. Use regular
		* completions for those, only batch free for fixed
		* file.
		*/
		if (!(req->flags & REQ_F_FIXED_FILE)) {
		if (!file) {
		file = req->rw.ki_filp;
		file_count = 1;
		} else if (file == req->rw.ki_filp) {
		file_count++;
		if (req->flags & REQ_F_FIXED_FILE) {
		reqs[to_free++] = req;
		if (to_free == ARRAY_SIZE(reqs))
		io_free_req_many(ctx, reqs, &to_free);
		} else {
		fput_many(file, file_count);
		file = req->rw.ki_filp;
		file_count = 1;
		io_free_req(req);
		}
		}

		if (to_free == ARRAY_SIZE(reqs))
		io_free_req_many(ctx, reqs, &to_free);
		}
		io_commit_cqring(ctx);

		if (file)
		fput_many(file, file_count);
		io_commit_cqring(ctx);
		io_free_req_many(ctx, reqs, &to_free);
		}

		@@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb)
		}
		}

		static void io_fput(struct io_kiocb *req)
		{
		if (!(req->flags & REQ_F_FIXED_FILE))
		fput(req->rw.ki_filp);
		}

		static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
		{
		struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);

		kiocb_end_write(kiocb);

		io_fput(req);
		io_cqring_add_event(req->ctx, req->user_data, res, 0);
		io_free_req(req);
		io_put_req(req);
		}

		static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
		@@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb req, const struct sqe_submit s,
		const struct io_uring_sqe *sqe = s->sqe;
		struct io_ring_ctx *ctx = req->ctx;
		struct kiocb *kiocb = &req->rw;
		unsigned ioprio, flags;
		int fd, ret;
		unsigned ioprio;
		int ret;

		if (!req->file)
		return -EBADF;
		/* For -EAGAIN retry, everything is already prepped */
		if (kiocb->ki_filp)
		if (req->flags & REQ_F_PREPPED)
		return 0;

		flags = READ_ONCE(sqe->flags);
		fd = READ_ONCE(sqe->fd);

		if (flags & IOSQE_FIXED_FILE) {
		if (unlikely(!ctx->user_files \|\|
		(unsigned) fd >= ctx->nr_user_files))
		return -EBADF;
		kiocb->ki_filp = ctx->user_files[fd];
		req->flags \|= REQ_F_FIXED_FILE;
		} else {
		if (s->needs_fixed_file)
		return -EBADF;
		kiocb->ki_filp = io_file_get(state, fd);
		if (unlikely(!kiocb->ki_filp))
		return -EBADF;
		if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
		if (force_nonblock && !io_file_supports_async(req->file))
		force_nonblock = false;
		}

		kiocb->ki_pos = READ_ONCE(sqe->off);
		kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
		kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
		@@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb req, const struct sqe_submit s,
		if (ioprio) {
		ret = ioprio_check_cap(ioprio);
		if (ret)
		goto out_fput;
		return ret;

		kiocb->ki_ioprio = ioprio;
		} else
		@@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb req, const struct sqe_submit s,

		ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
		if (unlikely(ret))
		goto out_fput;
		return ret;
		if (force_nonblock) {
		kiocb->ki_flags \|= IOCB_NOWAIT;
		req->flags \|= REQ_F_FORCE_NONBLOCK;
		}
		if (ctx->flags & IORING_SETUP_IOPOLL) {
		ret = -EOPNOTSUPP;
		if (!(kiocb->ki_flags & IOCB_DIRECT) \|\|
		!kiocb->ki_filp->f_op->iopoll)
		goto out_fput;
		return -EOPNOTSUPP;

		req->error = 0;
		kiocb->ki_flags \|= IOCB_HIPRI;
		kiocb->ki_complete = io_complete_rw_iopoll;
		} else {
		if (kiocb->ki_flags & IOCB_HIPRI) {
		ret = -EINVAL;
		goto out_fput;
		}
		if (kiocb->ki_flags & IOCB_HIPRI)
		return -EINVAL;
		kiocb->ki_complete = io_complete_rw;
		}
		req->flags \|= REQ_F_PREPPED;
		return 0;
		out_fput:
		if (!(flags & IOSQE_FIXED_FILE)) {
		/*
		* in case of error, we didn't use this file reference. drop it.
		*/
		if (state)
		state->used_refs--;
		io_file_put(state, kiocb->ki_filp);
		}
		return ret;
		}

		static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
		@@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
		iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
		if (offset)
		iov_iter_advance(iter, offset);

		/* don't drop a reference to these pages */
		iter->type \|= ITER_BVEC_FLAG_NO_REF;
		return 0;
		}

		@@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
		opcode = READ_ONCE(sqe->opcode);
		if (opcode == IORING_OP_READ_FIXED \|\|
		opcode == IORING_OP_WRITE_FIXED) {
		ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
		int ret = io_import_fixed(ctx, rw, sqe, iter);
		*iovec = NULL;
		return ret;
		}
		@@ -945,7 +939,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
		async_list->io_end = io_end;
		}

		static ssize_t io_read(struct io_kiocb req, const struct sqe_submit s,
		static int io_read(struct io_kiocb req, const struct sqe_submit s,
		bool force_nonblock, struct io_submit_state *state)
		{
		struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
		@@ -953,23 +947,21 @@ static ssize_t io_read(struct io_kiocb req, const struct sqe_submit s,
		struct iov_iter iter;
		struct file *file;
		size_t iov_count;
		ssize_t ret;
		int ret;

		ret = io_prep_rw(req, s, force_nonblock, state);
		if (ret)
		return ret;
		file = kiocb->ki_filp;

		ret = -EBADF;
		if (unlikely(!(file->f_mode & FMODE_READ)))
		goto out_fput;
		ret = -EINVAL;
		return -EBADF;
		if (unlikely(!file->f_op->read_iter))
		goto out_fput;
		return -EINVAL;

		ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
		if (ret)
		goto out_fput;
		return ret;

		iov_count = iov_iter_count(&iter);
		ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
		@@ -991,14 +983,10 @@ static ssize_t io_read(struct io_kiocb req, const struct sqe_submit s,
		}
		}
		kfree(iovec);
		out_fput:
		/* Hold on to the file for -EAGAIN */
		if (unlikely(ret && ret != -EAGAIN))
		io_fput(req);
		return ret;
		}

		static ssize_t io_write(struct io_kiocb req, const struct sqe_submit s,
		static int io_write(struct io_kiocb req, const struct sqe_submit s,
		bool force_nonblock, struct io_submit_state *state)
		{
		struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
		@@ -1006,23 +994,21 @@ static ssize_t io_write(struct io_kiocb req, const struct sqe_submit s,
		struct iov_iter iter;
		struct file *file;
		size_t iov_count;
		ssize_t ret;
		int ret;

		ret = io_prep_rw(req, s, force_nonblock, state);
		if (ret)
		return ret;

		ret = -EBADF;
		file = kiocb->ki_filp;
		if (unlikely(!(file->f_mode & FMODE_WRITE)))
		goto out_fput;
		ret = -EINVAL;
		return -EBADF;
		if (unlikely(!file->f_op->write_iter))
		goto out_fput;
		return -EINVAL;

		ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
		if (ret)
		goto out_fput;
		return ret;

		iov_count = iov_iter_count(&iter);

		@@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb req, const struct sqe_submit s,
		}
		out_free:
		kfree(iovec);
		out_fput:
		/* Hold on to the file for -EAGAIN */
		if (unlikely(ret && ret != -EAGAIN))
		io_fput(req);
		return ret;
		}

		@@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
		if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;

		/*
		* Twilight zone - it's possible that someone issued an opcode that
		* has a file attached, then got -EAGAIN on submission, and changed
		* the sqe before we retried it from async context. Avoid dropping
		* a file reference for this malicious case, and flag the error.
		*/
		if (req->rw.ki_filp) {
		err = -EBADF;
		io_fput(req);
		}
		io_cqring_add_event(ctx, user_data, err, 0);
		io_free_req(req);
		io_put_req(req);
		return 0;
		}

		static int io_prep_fsync(struct io_kiocb req, const struct io_uring_sqe sqe)
		{
		struct io_ring_ctx *ctx = req->ctx;
		unsigned flags;
		int fd;

		/* Prep already done */
		if (req->rw.ki_filp)
		if (!req->file)
		return -EBADF;
		/* Prep already done (EAGAIN retry) */
		if (req->flags & REQ_F_PREPPED)
		return 0;

		if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
		@@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb req, const struct io_uring_sqe sqe)
		if (unlikely(sqe->addr \|\| sqe->ioprio \|\| sqe->buf_index))
		return -EINVAL;

		fd = READ_ONCE(sqe->fd);
		flags = READ_ONCE(sqe->flags);

		if (flags & IOSQE_FIXED_FILE) {
		if (unlikely(!ctx->user_files \|\| fd >= ctx->nr_user_files))
		return -EBADF;
		req->rw.ki_filp = ctx->user_files[fd];
		req->flags \|= REQ_F_FIXED_FILE;
		} else {
		req->rw.ki_filp = fget(fd);
		if (unlikely(!req->rw.ki_filp))
		return -EBADF;
		}

		req->flags \|= REQ_F_PREPPED;
		return 0;
		}

		@@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb req, const struct io_uring_sqe sqe,
		end > 0 ? end : LLONG_MAX,
		fsync_flags & IORING_FSYNC_DATASYNC);

		io_fput(req);
		io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
		io_free_req(req);
		io_put_req(req);
		return 0;
		}

		@@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb req, const struct io_uring_sqe sqe)
		spin_unlock_irq(&ctx->completion_lock);

		io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
		io_free_req(req);
		io_put_req(req);
		return 0;
		}

		static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
		static void io_poll_complete(struct io_ring_ctx ctx, struct io_kiocb req,
		__poll_t mask)
		{
		io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0);
		io_fput(req);
		io_free_req(req);
		req->poll.done = true;
		io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
		io_commit_cqring(ctx);
		}

		static void io_poll_complete_work(struct work_struct *work)
		@@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work)
		return;
		}
		list_del_init(&req->list);
		io_poll_complete(ctx, req, mask);
		spin_unlock_irq(&ctx->completion_lock);

		io_poll_complete(req, mask);
		io_cqring_ev_posted(ctx);
		io_put_req(req);
		}

		static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
		@@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
		struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
		struct io_ring_ctx *ctx = req->ctx;
		__poll_t mask = key_to_poll(key);

		poll->woken = true;

		/* for instances that support it check for an event match first: */
		if (mask) {
		unsigned long flags;

		if (!(mask & poll->events))
		/* for instances that support it check for an event match first: */
		if (mask && !(mask & poll->events))
		return 0;

		/* try to complete the iocb inline if we can: */
		if (spin_trylock_irqsave(&ctx->completion_lock, flags)) {
		list_del_init(&poll->wait.entry);

		if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
		list_del(&req->list);
		io_poll_complete(ctx, req, mask);
		spin_unlock_irqrestore(&ctx->completion_lock, flags);

		list_del_init(&poll->wait.entry);
		io_poll_complete(req, mask);
		return 1;
		}
		io_cqring_ev_posted(ctx);
		io_put_req(req);
		} else {
		queue_work(ctx->sqo_wq, &req->work);
		}

		list_del_init(&poll->wait.entry);
		queue_work(ctx->sqo_wq, &req->work);
		return 1;
		}

		@@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb req, const struct io_uring_sqe sqe)
		struct io_poll_iocb *poll = &req->poll;
		struct io_ring_ctx *ctx = req->ctx;
		struct io_poll_table ipt;
		unsigned flags;
		bool cancel = false;
		__poll_t mask;
		u16 events;
		int fd;

		if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
		return -EINVAL;
		if (sqe->addr \|\| sqe->ioprio \|\| sqe->off \|\| sqe->len \|\| sqe->buf_index)
		return -EINVAL;
		if (!poll->file)
		return -EBADF;

		INIT_WORK(&req->work, io_poll_complete_work);
		events = READ_ONCE(sqe->poll_events);
		poll->events = demangle_poll(events) \| EPOLLERR \| EPOLLHUP;

		flags = READ_ONCE(sqe->flags);
		fd = READ_ONCE(sqe->fd);

		if (flags & IOSQE_FIXED_FILE) {
		if (unlikely(!ctx->user_files \|\| fd >= ctx->nr_user_files))
		return -EBADF;
		poll->file = ctx->user_files[fd];
		req->flags \|= REQ_F_FIXED_FILE;
		} else {
		poll->file = fget(fd);
		}
		if (unlikely(!poll->file))
		return -EBADF;

		poll->head = NULL;
		poll->woken = false;
		poll->done = false;
		poll->canceled = false;

		ipt.pt._qproc = io_poll_queue_proc;
		@@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb req, const struct io_uring_sqe sqe)
		INIT_LIST_HEAD(&poll->wait.entry);
		init_waitqueue_func_entry(&poll->wait, io_poll_wake);

		/* one for removal from waitqueue, one for this function */
		refcount_set(&req->refs, 2);

		mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
		if (unlikely(!poll->head)) {
		/* we did not manage to set up a waitqueue, done */
		goto out;
		}

		spin_lock_irq(&ctx->completion_lock);
		if (likely(poll->head)) {
		spin_lock(&poll->head->lock);
		if (poll->woken) {
		/* wake_up context handles the rest */
		mask = 0;
		if (unlikely(list_empty(&poll->wait.entry))) {
		if (ipt.error)
		cancel = true;
		ipt.error = 0;
		} else if (mask \|\| ipt.error) {
		/* if we get an error or a mask we are done */
		WARN_ON_ONCE(list_empty(&poll->wait.entry));
		mask = 0;
		}
		if (mask \|\| ipt.error)
		list_del_init(&poll->wait.entry);
		} else {
		/* actually waiting for an event */
		else if (cancel)
		WRITE_ONCE(poll->canceled, true);
		else if (!poll->done) /* actually waiting for an event */
		list_add_tail(&req->list, &ctx->cancel_list);
		}
		spin_unlock(&poll->head->lock);
		}
		if (mask) { /* no async, we'd stolen it */
		req->error = mangle_poll(mask);
		ipt.error = 0;
		io_poll_complete(ctx, req, mask);
		}
		spin_unlock_irq(&ctx->completion_lock);

		out:
		if (unlikely(ipt.error)) {
		if (!(flags & IOSQE_FIXED_FILE))
		fput(poll->file);
		/*
		* Drop one of our refs to this req, __io_submit_sqe() will
		* drop the other one since we're returning an error.
		*/
		io_free_req(req);
		return ipt.error;
		if (mask) {
		io_cqring_ev_posted(ctx);
		io_put_req(req);
		}

		if (mask)
		io_poll_complete(req, mask);
		io_free_req(req);
		return 0;
		return ipt.error;
		}

		static int __io_submit_sqe(struct io_ring_ctx ctx, struct io_kiocb req,
		const struct sqe_submit *s, bool force_nonblock,
		struct io_submit_state *state)
		{
		ssize_t ret;
		int opcode;
		int ret, opcode;

		if (unlikely(s->index >= ctx->sq_entries))
		return -EINVAL;
		@@ -1524,10 +1456,13 @@ static void io_sq_wq_submit_work(struct work_struct *work)
		break;
		cond_resched();
		} while (1);

		/* drop submission reference */
		io_put_req(req);
		}
		if (ret) {
		io_cqring_add_event(ctx, sqe->user_data, ret, 0);
		io_free_req(req);
		io_put_req(req);
		}

		/* async context always use a copy of the sqe */
		@@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list list, struct io_kiocb req)
		return ret;
		}

		static bool io_op_needs_file(const struct io_uring_sqe *sqe)
		{
		int op = READ_ONCE(sqe->opcode);

		switch (op) {
		case IORING_OP_NOP:
		case IORING_OP_POLL_REMOVE:
		return false;
		default:
		return true;
		}
		}

		static int io_req_set_file(struct io_ring_ctx ctx, const struct sqe_submit s,
		struct io_submit_state state, struct io_kiocb req)
		{
		unsigned flags;
		int fd;

		flags = READ_ONCE(s->sqe->flags);
		fd = READ_ONCE(s->sqe->fd);

		if (!io_op_needs_file(s->sqe)) {
		req->file = NULL;
		return 0;
		}

		if (flags & IOSQE_FIXED_FILE) {
		if (unlikely(!ctx->user_files \|\|
		(unsigned) fd >= ctx->nr_user_files))
		return -EBADF;
		req->file = ctx->user_files[fd];
		req->flags \|= REQ_F_FIXED_FILE;
		} else {
		if (s->needs_fixed_file)
		return -EBADF;
		req->file = io_file_get(state, fd);
		if (unlikely(!req->file))
		return -EBADF;
		}

		return 0;
		}

		static int io_submit_sqe(struct io_ring_ctx ctx, struct sqe_submit s,
		struct io_submit_state *state)
		{
		struct io_kiocb *req;
		ssize_t ret;
		int ret;

		/* enforce forwards compatibility on users */
		if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
		@@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx ctx, struct sqe_submit s,
		if (unlikely(!req))
		return -EAGAIN;

		req->rw.ki_filp = NULL;
		ret = io_req_set_file(ctx, s, state, req);
		if (unlikely(ret))
		goto out;

		ret = __io_submit_sqe(ctx, req, s, true, state);
		if (ret == -EAGAIN) {
		@@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx ctx, struct sqe_submit s,
		INIT_WORK(&req->work, io_sq_wq_submit_work);
		queue_work(ctx->sqo_wq, &req->work);
		}
		ret = 0;

		/*
		* Queued up for async execution, worker will release
		* submit reference when the iocb is actually
		* submitted.
		*/
		return 0;
		}
		}

		out:
		/* drop submission reference */
		io_put_req(req);

		/* and drop final reference, if we failed */
		if (ret)
		io_free_req(req);
		io_put_req(req);

		return ret;
		}

fs/iomap.c

+7 −5

Original line number	Diff line number	Diff line
		@@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
		if (should_dirty) {
		bio_check_pages_dirty(bio);
		} else {
		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
		struct bvec_iter_all iter_all;
		struct bio_vec *bvec;
		int i;
		struct bvec_iter_all iter_all;

		bio_for_each_segment_all(bvec, bio, i, iter_all)
		put_page(bvec->bv_page);
		}
		bio_put(bio);
		}
		}