Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1c4404ef authored by Jens Axboe's avatar Jens Axboe Committed by Greg Kroah-Hartman
Browse files

io_uring: make sure async workqueue is canceled on exit



Track async work items that we queue, so we can safely cancel them
if the ring is closed or the process exits. Newer kernels handle
this automatically with io-wq, but the old workqueue based setup needs
a bit of special help to get there.

There's no upstream variant of this, as that would require backporting
all the io-wq changes from 5.5 and on. Hence I made a one-off that
ensures that we don't leak memory if we have async work items that
need active cancelation (like socket IO).

Reported-by: default avatarAgarwal, Anchal <anchalag@amazon.com>
Tested-by: default avatarAgarwal, Anchal <anchalag@amazon.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent e7522089
Loading
Loading
Loading
Loading
+63 −0
Original line number Diff line number Diff line
@@ -267,6 +267,9 @@ struct io_ring_ctx {
#if defined(CONFIG_UNIX)
	struct socket		*ring_sock;
#endif

	struct list_head	task_list;
	spinlock_t		task_lock;
};

struct sqe_submit {
@@ -331,14 +334,18 @@ struct io_kiocb {
#define REQ_F_ISREG		2048	/* regular file */
#define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
#define REQ_F_TIMEOUT_NOSEQ	8192	/* no timeout sequence */
#define REQ_F_CANCEL		16384	/* cancel request */
	unsigned long		fsize;
	u64			user_data;
	u32			result;
	u32			sequence;
	struct task_struct	*task;

	struct fs_struct	*fs;

	struct work_struct	work;
	struct task_struct	*work_task;
	struct list_head	task_list;
};

#define IO_PLUG_THRESHOLD		2
@@ -425,6 +432,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
	INIT_LIST_HEAD(&ctx->cancel_list);
	INIT_LIST_HEAD(&ctx->defer_list);
	INIT_LIST_HEAD(&ctx->timeout_list);
	INIT_LIST_HEAD(&ctx->task_list);
	spin_lock_init(&ctx->task_lock);
	return ctx;
}

@@ -492,6 +501,7 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
				       struct io_kiocb *req)
{
	unsigned long flags;
	int rw = 0;

	if (req->submit.sqe) {
@@ -503,6 +513,13 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
		}
	}

	req->task = current;

	spin_lock_irqsave(&ctx->task_lock, flags);
	list_add(&req->task_list, &ctx->task_list);
	req->work_task = NULL;
	spin_unlock_irqrestore(&ctx->task_lock, flags);

	queue_work(ctx->sqo_wq[rw], &req->work);
}

@@ -2201,6 +2218,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)

	old_cred = override_creds(ctx->creds);
	async_list = io_async_list_from_sqe(ctx, req->submit.sqe);

	allow_kernel_signal(SIGINT);
restart:
	do {
		struct sqe_submit *s = &req->submit;
@@ -2232,6 +2251,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
		}

		if (!ret) {
			req->work_task = current;
			if (req->flags & REQ_F_CANCEL) {
				ret = -ECANCELED;
				goto end_req;
			}

			s->has_user = cur_mm != NULL;
			s->needs_lock = true;
			do {
@@ -2246,6 +2271,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
					break;
				cond_resched();
			} while (1);
end_req:
			if (!list_empty(&req->task_list)) {
				spin_lock_irq(&ctx->task_lock);
				list_del_init(&req->task_list);
				spin_unlock_irq(&ctx->task_lock);
			}
		}

		/* drop submission reference */
@@ -2311,6 +2342,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
	}

out:
	disallow_signal(SIGINT);
	if (cur_mm) {
		set_fs(old_fs);
		unuse_mm(cur_mm);
@@ -3675,12 +3707,32 @@ static int io_uring_fasync(int fd, struct file *file, int on)
	return fasync_helper(fd, file, on, &ctx->cq_fasync);
}

static void io_cancel_async_work(struct io_ring_ctx *ctx,
				 struct task_struct *task)
{
	if (list_empty(&ctx->task_list))
		return;

	spin_lock_irq(&ctx->task_lock);
	while (!list_empty(&ctx->task_list)) {
		struct io_kiocb *req;

		req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list);
		list_del_init(&req->task_list);
		req->flags |= REQ_F_CANCEL;
		if (req->work_task && (!task || req->task == task))
			send_sig(SIGINT, req->work_task, 1);
	}
	spin_unlock_irq(&ctx->task_lock);
}

static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
	mutex_lock(&ctx->uring_lock);
	percpu_ref_kill(&ctx->refs);
	mutex_unlock(&ctx->uring_lock);

	io_cancel_async_work(ctx, NULL);
	io_kill_timeouts(ctx);
	io_poll_remove_all(ctx);
	io_iopoll_reap_events(ctx);
@@ -3688,6 +3740,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
	io_ring_ctx_free(ctx);
}

static int io_uring_flush(struct file *file, void *data)
{
	struct io_ring_ctx *ctx = file->private_data;

	if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
		io_cancel_async_work(ctx, current);

	return 0;
}

static int io_uring_release(struct inode *inode, struct file *file)
{
	struct io_ring_ctx *ctx = file->private_data;
@@ -3792,6 +3854,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,

static const struct file_operations io_uring_fops = {
	.release	= io_uring_release,
	.flush		= io_uring_flush,
	.mmap		= io_uring_mmap,
	.poll		= io_uring_poll,
	.fasync		= io_uring_fasync,