Loading fs/aio.c +106 −31 Original line number Diff line number Diff line Loading @@ -1617,6 +1617,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } /* * Safely lock the waitqueue which the request is on, synchronizing with the * case where the ->poll() provider decides to free its waitqueue early. * * Returns true on success, meaning that req->head->lock was locked, req->wait * is on req->head, and an RCU read lock was taken. Returns false if the * request was already removed from its waitqueue (which might no longer exist). */ static bool poll_iocb_lock_wq(struct poll_iocb *req) { wait_queue_head_t *head; /* * While we hold the waitqueue lock and the waitqueue is nonempty, * wake_up_pollfree() will wait for us. However, taking the waitqueue * lock in the first place can race with the waitqueue being freed. * * We solve this as eventpoll does: by taking advantage of the fact that * all users of wake_up_pollfree() will RCU-delay the actual free. If * we enter rcu_read_lock() and see that the pointer to the queue is * non-NULL, we can then lock it without the memory being freed out from * under us, then check whether the request is still on the queue. * * Keep holding rcu_read_lock() as long as we hold the queue lock, in * case the caller deletes the entry from the queue, leaving it empty. * In that case, only RCU prevents the queue memory from being freed. */ rcu_read_lock(); head = smp_load_acquire(&req->head); if (head) { spin_lock(&head->lock); if (!list_empty(&req->wait.entry)) return true; spin_unlock(&head->lock); } rcu_read_unlock(); return false; } static void poll_iocb_unlock_wq(struct poll_iocb *req) { spin_unlock(&req->head->lock); rcu_read_unlock(); } static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); Loading @@ -1636,7 +1681,7 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); spin_lock(&req->head->lock); if (poll_iocb_lock_wq(req)) { if (!mask && !READ_ONCE(req->cancelled)) { /* * The request isn't actually ready to be completed yet. Loading @@ -1648,12 +1693,13 @@ static void aio_poll_complete_work(struct work_struct *work) } else { req->work_scheduled = false; } spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); spin_unlock_irq(&ctx->ctx_lock); return; } list_del_init(&req->wait.entry); spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); spin_unlock_irq(&ctx->ctx_lock); Loading @@ -1667,13 +1713,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; spin_lock(&req->head->lock); if (poll_iocb_lock_wq(req)) { WRITE_ONCE(req->cancelled, true); if (!req->work_scheduled) { schedule_work(&aiocb->poll.work); req->work_scheduled = true; } spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); } /* else, the request was force-cancelled by POLLFREE already */ return 0; } Loading Loading @@ -1725,7 +1772,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * * Don't remove the request from the waitqueue here, as it might * not actually be complete yet (we won't know until vfs_poll() * is called), and we must not miss any wakeups. * is called), and we must not miss any wakeups. POLLFREE is an * exception to this; see below. */ if (req->work_scheduled) { req->work_need_resched = true; Loading @@ -1733,6 +1781,28 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, schedule_work(&req->work); req->work_scheduled = true; } /* * If the waitqueue is being freed early but we can't complete * the request inline, we have to tear down the request as best * we can. That means immediately removing the request from its * waitqueue and preventing all further accesses to the * waitqueue via the request. We also need to schedule the * completion work (done above). Also mark the request as * cancelled, to potentially skip an unneeded call to ->poll(). */ if (mask & POLLFREE) { WRITE_ONCE(req->cancelled, true); list_del_init(&req->wait.entry); /* * Careful: this *must* be the last step, since as soon * as req->head is NULL'ed out, the request can be * completed and freed, since aio_poll_complete_work() * will no longer need to take the waitqueue lock. */ smp_store_release(&req->head, NULL); } } return 1; } Loading @@ -1740,6 +1810,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; bool queued; int error; }; Loading @@ -1750,11 +1821,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ if (unlikely(pt->iocb->poll.head)) { if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); Loading Loading @@ -1786,6 +1858,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ Loading @@ -1794,9 +1867,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); if (likely(req->head)) { spin_lock(&req->head->lock); if (list_empty(&req->wait.entry) || req->work_scheduled) { if (likely(apt.queued)) { bool on_queue = poll_iocb_lock_wq(req); if (!on_queue || req->work_scheduled) { /* * aio_poll_wake() already either scheduled the async * completion work, or completed the request inline. Loading @@ -1812,7 +1886,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) } else if (cancel) { /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); } else if (!list_empty(&req->wait.entry)) { } else if (on_queue) { /* * Actually waiting for an event, so add the request to * active_reqs so that it can be cancelled if needed. Loading @@ -1820,7 +1894,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } spin_unlock(&req->head->lock); if (on_queue) poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); Loading include/uapi/asm-generic/poll.h +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif #define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ #define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 Loading Loading
fs/aio.c +106 −31 Original line number Diff line number Diff line Loading @@ -1617,6 +1617,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } /* * Safely lock the waitqueue which the request is on, synchronizing with the * case where the ->poll() provider decides to free its waitqueue early. * * Returns true on success, meaning that req->head->lock was locked, req->wait * is on req->head, and an RCU read lock was taken. Returns false if the * request was already removed from its waitqueue (which might no longer exist). */ static bool poll_iocb_lock_wq(struct poll_iocb *req) { wait_queue_head_t *head; /* * While we hold the waitqueue lock and the waitqueue is nonempty, * wake_up_pollfree() will wait for us. However, taking the waitqueue * lock in the first place can race with the waitqueue being freed. * * We solve this as eventpoll does: by taking advantage of the fact that * all users of wake_up_pollfree() will RCU-delay the actual free. If * we enter rcu_read_lock() and see that the pointer to the queue is * non-NULL, we can then lock it without the memory being freed out from * under us, then check whether the request is still on the queue. * * Keep holding rcu_read_lock() as long as we hold the queue lock, in * case the caller deletes the entry from the queue, leaving it empty. * In that case, only RCU prevents the queue memory from being freed. */ rcu_read_lock(); head = smp_load_acquire(&req->head); if (head) { spin_lock(&head->lock); if (!list_empty(&req->wait.entry)) return true; spin_unlock(&head->lock); } rcu_read_unlock(); return false; } static void poll_iocb_unlock_wq(struct poll_iocb *req) { spin_unlock(&req->head->lock); rcu_read_unlock(); } static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); Loading @@ -1636,7 +1681,7 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); spin_lock(&req->head->lock); if (poll_iocb_lock_wq(req)) { if (!mask && !READ_ONCE(req->cancelled)) { /* * The request isn't actually ready to be completed yet. Loading @@ -1648,12 +1693,13 @@ static void aio_poll_complete_work(struct work_struct *work) } else { req->work_scheduled = false; } spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); spin_unlock_irq(&ctx->ctx_lock); return; } list_del_init(&req->wait.entry); spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); spin_unlock_irq(&ctx->ctx_lock); Loading @@ -1667,13 +1713,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; spin_lock(&req->head->lock); if (poll_iocb_lock_wq(req)) { WRITE_ONCE(req->cancelled, true); if (!req->work_scheduled) { schedule_work(&aiocb->poll.work); req->work_scheduled = true; } spin_unlock(&req->head->lock); poll_iocb_unlock_wq(req); } /* else, the request was force-cancelled by POLLFREE already */ return 0; } Loading Loading @@ -1725,7 +1772,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * * Don't remove the request from the waitqueue here, as it might * not actually be complete yet (we won't know until vfs_poll() * is called), and we must not miss any wakeups. * is called), and we must not miss any wakeups. POLLFREE is an * exception to this; see below. */ if (req->work_scheduled) { req->work_need_resched = true; Loading @@ -1733,6 +1781,28 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, schedule_work(&req->work); req->work_scheduled = true; } /* * If the waitqueue is being freed early but we can't complete * the request inline, we have to tear down the request as best * we can. That means immediately removing the request from its * waitqueue and preventing all further accesses to the * waitqueue via the request. We also need to schedule the * completion work (done above). Also mark the request as * cancelled, to potentially skip an unneeded call to ->poll(). */ if (mask & POLLFREE) { WRITE_ONCE(req->cancelled, true); list_del_init(&req->wait.entry); /* * Careful: this *must* be the last step, since as soon * as req->head is NULL'ed out, the request can be * completed and freed, since aio_poll_complete_work() * will no longer need to take the waitqueue lock. */ smp_store_release(&req->head, NULL); } } return 1; } Loading @@ -1740,6 +1810,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; bool queued; int error; }; Loading @@ -1750,11 +1821,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ if (unlikely(pt->iocb->poll.head)) { if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); Loading Loading @@ -1786,6 +1858,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ Loading @@ -1794,9 +1867,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); if (likely(req->head)) { spin_lock(&req->head->lock); if (list_empty(&req->wait.entry) || req->work_scheduled) { if (likely(apt.queued)) { bool on_queue = poll_iocb_lock_wq(req); if (!on_queue || req->work_scheduled) { /* * aio_poll_wake() already either scheduled the async * completion work, or completed the request inline. Loading @@ -1812,7 +1886,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) } else if (cancel) { /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); } else if (!list_empty(&req->wait.entry)) { } else if (on_queue) { /* * Actually waiting for an event, so add the request to * active_reqs so that it can be cancelled if needed. Loading @@ -1820,7 +1894,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } spin_unlock(&req->head->lock); if (on_queue) poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); Loading
include/uapi/asm-generic/poll.h +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif #define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ #define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 Loading