From: Greg Kroah-Hartman Date: Sun, 12 Dec 2021 13:44:46 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.4.295~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5e3fd8d79fecaa87a63695841878e6bb54561cf5;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: aio-fix-use-after-free-due-to-missing-pollfree-handling.patch aio-keep-poll-requests-on-waitqueue-until-completed.patch binder-use-wake_up_pollfree.patch signalfd-use-wake_up_pollfree.patch wait-add-wake_up_pollfree.patch --- diff --git a/queue-5.15/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch b/queue-5.15/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch new file mode 100644 index 00000000000..0635d41a262 --- /dev/null +++ b/queue-5.15/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch @@ -0,0 +1,279 @@ +From 50252e4b5e989ce64555c7aef7516bdefc2fea72 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Wed, 8 Dec 2021 17:04:55 -0800 +Subject: aio: fix use-after-free due to missing POLLFREE handling + +From: Eric Biggers + +commit 50252e4b5e989ce64555c7aef7516bdefc2fea72 upstream. + +signalfd_poll() and binder_poll() are special in that they use a +waitqueue whose lifetime is the current task, rather than the struct +file as is normally the case. This is okay for blocking polls, since a +blocking poll occurs within one task; however, non-blocking polls +require another solution. This solution is for the queue to be cleared +before it is freed, by sending a POLLFREE notification to all waiters. + +Unfortunately, only eventpoll handles POLLFREE. A second type of +non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't +handle POLLFREE. This allows a use-after-free to occur if a signalfd or +binder fd is polled with aio poll, and the waitqueue gets freed. + +Fix this by making aio poll handle POLLFREE. + +A patch by Ramji Jiyani +(https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com) +tried to do this by making aio_poll_wake() always complete the request +inline if POLLFREE is seen. However, that solution had two bugs. +First, it introduced a deadlock, as it unconditionally locked the aio +context while holding the waitqueue lock, which inverts the normal +locking order. Second, it didn't consider that POLLFREE notifications +are missed while the request has been temporarily de-queued. + +The second problem was solved by my previous patch. This patch then +properly fixes the use-after-free by handling POLLFREE in a +deadlock-free way. It does this by taking advantage of the fact that +freeing of the waitqueue is RCU-delayed, similar to what eventpoll does. + +Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") +Cc: # v4.18+ +Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + fs/aio.c | 137 ++++++++++++++++++++++++++++++---------- + include/uapi/asm-generic/poll.h | 2 + 2 files changed, 107 insertions(+), 32 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -1621,6 +1621,51 @@ static void aio_poll_put_work(struct wor + iocb_put(iocb); + } + ++/* ++ * Safely lock the waitqueue which the request is on, synchronizing with the ++ * case where the ->poll() provider decides to free its waitqueue early. ++ * ++ * Returns true on success, meaning that req->head->lock was locked, req->wait ++ * is on req->head, and an RCU read lock was taken. Returns false if the ++ * request was already removed from its waitqueue (which might no longer exist). ++ */ ++static bool poll_iocb_lock_wq(struct poll_iocb *req) ++{ ++ wait_queue_head_t *head; ++ ++ /* ++ * While we hold the waitqueue lock and the waitqueue is nonempty, ++ * wake_up_pollfree() will wait for us. However, taking the waitqueue ++ * lock in the first place can race with the waitqueue being freed. ++ * ++ * We solve this as eventpoll does: by taking advantage of the fact that ++ * all users of wake_up_pollfree() will RCU-delay the actual free. If ++ * we enter rcu_read_lock() and see that the pointer to the queue is ++ * non-NULL, we can then lock it without the memory being freed out from ++ * under us, then check whether the request is still on the queue. ++ * ++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in ++ * case the caller deletes the entry from the queue, leaving it empty. ++ * In that case, only RCU prevents the queue memory from being freed. ++ */ ++ rcu_read_lock(); ++ head = smp_load_acquire(&req->head); ++ if (head) { ++ spin_lock(&head->lock); ++ if (!list_empty(&req->wait.entry)) ++ return true; ++ spin_unlock(&head->lock); ++ } ++ rcu_read_unlock(); ++ return false; ++} ++ ++static void poll_iocb_unlock_wq(struct poll_iocb *req) ++{ ++ spin_unlock(&req->head->lock); ++ rcu_read_unlock(); ++} ++ + static void aio_poll_complete_work(struct work_struct *work) + { + struct poll_iocb *req = container_of(work, struct poll_iocb, work); +@@ -1640,24 +1685,25 @@ static void aio_poll_complete_work(struc + * avoid further branches in the fast path. + */ + spin_lock_irq(&ctx->ctx_lock); +- spin_lock(&req->head->lock); +- if (!mask && !READ_ONCE(req->cancelled)) { +- /* +- * The request isn't actually ready to be completed yet. +- * Reschedule completion if another wakeup came in. +- */ +- if (req->work_need_resched) { +- schedule_work(&req->work); +- req->work_need_resched = false; +- } else { +- req->work_scheduled = false; ++ if (poll_iocb_lock_wq(req)) { ++ if (!mask && !READ_ONCE(req->cancelled)) { ++ /* ++ * The request isn't actually ready to be completed yet. ++ * Reschedule completion if another wakeup came in. ++ */ ++ if (req->work_need_resched) { ++ schedule_work(&req->work); ++ req->work_need_resched = false; ++ } else { ++ req->work_scheduled = false; ++ } ++ poll_iocb_unlock_wq(req); ++ spin_unlock_irq(&ctx->ctx_lock); ++ return; + } +- spin_unlock(&req->head->lock); +- spin_unlock_irq(&ctx->ctx_lock); +- return; +- } +- list_del_init(&req->wait.entry); +- spin_unlock(&req->head->lock); ++ list_del_init(&req->wait.entry); ++ poll_iocb_unlock_wq(req); ++ } /* else, POLLFREE has freed the waitqueue, so we must complete */ + list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + spin_unlock_irq(&ctx->ctx_lock); +@@ -1671,13 +1717,14 @@ static int aio_poll_cancel(struct kiocb + struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); + struct poll_iocb *req = &aiocb->poll; + +- spin_lock(&req->head->lock); +- WRITE_ONCE(req->cancelled, true); +- if (!req->work_scheduled) { +- schedule_work(&aiocb->poll.work); +- req->work_scheduled = true; +- } +- spin_unlock(&req->head->lock); ++ if (poll_iocb_lock_wq(req)) { ++ WRITE_ONCE(req->cancelled, true); ++ if (!req->work_scheduled) { ++ schedule_work(&aiocb->poll.work); ++ req->work_scheduled = true; ++ } ++ poll_iocb_unlock_wq(req); ++ } /* else, the request was force-cancelled by POLLFREE already */ + + return 0; + } +@@ -1729,7 +1776,8 @@ static int aio_poll_wake(struct wait_que + * + * Don't remove the request from the waitqueue here, as it might + * not actually be complete yet (we won't know until vfs_poll() +- * is called), and we must not miss any wakeups. ++ * is called), and we must not miss any wakeups. POLLFREE is an ++ * exception to this; see below. + */ + if (req->work_scheduled) { + req->work_need_resched = true; +@@ -1737,6 +1785,28 @@ static int aio_poll_wake(struct wait_que + schedule_work(&req->work); + req->work_scheduled = true; + } ++ ++ /* ++ * If the waitqueue is being freed early but we can't complete ++ * the request inline, we have to tear down the request as best ++ * we can. That means immediately removing the request from its ++ * waitqueue and preventing all further accesses to the ++ * waitqueue via the request. We also need to schedule the ++ * completion work (done above). Also mark the request as ++ * cancelled, to potentially skip an unneeded call to ->poll(). ++ */ ++ if (mask & POLLFREE) { ++ WRITE_ONCE(req->cancelled, true); ++ list_del_init(&req->wait.entry); ++ ++ /* ++ * Careful: this *must* be the last step, since as soon ++ * as req->head is NULL'ed out, the request can be ++ * completed and freed, since aio_poll_complete_work() ++ * will no longer need to take the waitqueue lock. ++ */ ++ smp_store_release(&req->head, NULL); ++ } + } + return 1; + } +@@ -1744,6 +1814,7 @@ static int aio_poll_wake(struct wait_que + struct aio_poll_table { + struct poll_table_struct pt; + struct aio_kiocb *iocb; ++ bool queued; + int error; + }; + +@@ -1754,11 +1825,12 @@ aio_poll_queue_proc(struct file *file, s + struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); + + /* multiple wait queues per file are not supported */ +- if (unlikely(pt->iocb->poll.head)) { ++ if (unlikely(pt->queued)) { + pt->error = -EINVAL; + return; + } + ++ pt->queued = true; + pt->error = 0; + pt->iocb->poll.head = head; + add_wait_queue(head, &pt->iocb->poll.wait); +@@ -1790,6 +1862,7 @@ static int aio_poll(struct aio_kiocb *ai + apt.pt._qproc = aio_poll_queue_proc; + apt.pt._key = req->events; + apt.iocb = aiocb; ++ apt.queued = false; + apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ + + /* initialized the list so that we can do list_empty checks */ +@@ -1798,9 +1871,10 @@ static int aio_poll(struct aio_kiocb *ai + + mask = vfs_poll(req->file, &apt.pt) & req->events; + spin_lock_irq(&ctx->ctx_lock); +- if (likely(req->head)) { +- spin_lock(&req->head->lock); +- if (list_empty(&req->wait.entry) || req->work_scheduled) { ++ if (likely(apt.queued)) { ++ bool on_queue = poll_iocb_lock_wq(req); ++ ++ if (!on_queue || req->work_scheduled) { + /* + * aio_poll_wake() already either scheduled the async + * completion work, or completed the request inline. +@@ -1816,7 +1890,7 @@ static int aio_poll(struct aio_kiocb *ai + } else if (cancel) { + /* Cancel if possible (may be too late though). */ + WRITE_ONCE(req->cancelled, true); +- } else if (!list_empty(&req->wait.entry)) { ++ } else if (on_queue) { + /* + * Actually waiting for an event, so add the request to + * active_reqs so that it can be cancelled if needed. +@@ -1824,7 +1898,8 @@ static int aio_poll(struct aio_kiocb *ai + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } +- spin_unlock(&req->head->lock); ++ if (on_queue) ++ poll_iocb_unlock_wq(req); + } + if (mask) { /* no async, we'd stolen it */ + aiocb->ki_res.res = mangle_poll(mask); +--- a/include/uapi/asm-generic/poll.h ++++ b/include/uapi/asm-generic/poll.h +@@ -29,7 +29,7 @@ + #define POLLRDHUP 0x2000 + #endif + +-#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ ++#define POLLFREE (__force __poll_t)0x4000 + + #define POLL_BUSY_LOOP (__force __poll_t)0x8000 + diff --git a/queue-5.15/aio-keep-poll-requests-on-waitqueue-until-completed.patch b/queue-5.15/aio-keep-poll-requests-on-waitqueue-until-completed.patch new file mode 100644 index 00000000000..216c8e5c3f8 --- /dev/null +++ b/queue-5.15/aio-keep-poll-requests-on-waitqueue-until-completed.patch @@ -0,0 +1,199 @@ +From 363bee27e25804d8981dd1c025b4ad49dc39c530 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Wed, 8 Dec 2021 17:04:54 -0800 +Subject: aio: keep poll requests on waitqueue until completed + +From: Eric Biggers + +commit 363bee27e25804d8981dd1c025b4ad49dc39c530 upstream. + +Currently, aio_poll_wake() will always remove the poll request from the +waitqueue. Then, if aio_poll_complete_work() sees that none of the +polled events are ready and the request isn't cancelled, it re-adds the +request to the waitqueue. (This can easily happen when polling a file +that doesn't pass an event mask when waking up its waitqueue.) + +This is fundamentally broken for two reasons: + + 1. If a wakeup occurs between vfs_poll() and the request being + re-added to the waitqueue, it will be missed because the request + wasn't on the waitqueue at the time. Therefore, IOCB_CMD_POLL + might never complete even if the polled file is ready. + + 2. When the request isn't on the waitqueue, there is no way to be + notified that the waitqueue is being freed (which happens when its + lifetime is shorter than the struct file's). This is supposed to + happen via the waitqueue entries being woken up with POLLFREE. + +Therefore, leave the requests on the waitqueue until they are actually +completed (or cancelled). To keep track of when aio_poll_complete_work +needs to be scheduled, use new fields in struct poll_iocb. Remove the +'done' field which is now redundant. + +Note that this is consistent with how sys_poll() and eventpoll work; +their wakeup functions do *not* remove the waitqueue entries. + +Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") +Cc: # v4.18+ +Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + fs/aio.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 63 insertions(+), 20 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -181,8 +181,9 @@ struct poll_iocb { + struct file *file; + struct wait_queue_head *head; + __poll_t events; +- bool done; + bool cancelled; ++ bool work_scheduled; ++ bool work_need_resched; + struct wait_queue_entry wait; + struct work_struct work; + }; +@@ -1639,14 +1640,26 @@ static void aio_poll_complete_work(struc + * avoid further branches in the fast path. + */ + spin_lock_irq(&ctx->ctx_lock); ++ spin_lock(&req->head->lock); + if (!mask && !READ_ONCE(req->cancelled)) { +- add_wait_queue(req->head, &req->wait); ++ /* ++ * The request isn't actually ready to be completed yet. ++ * Reschedule completion if another wakeup came in. ++ */ ++ if (req->work_need_resched) { ++ schedule_work(&req->work); ++ req->work_need_resched = false; ++ } else { ++ req->work_scheduled = false; ++ } ++ spin_unlock(&req->head->lock); + spin_unlock_irq(&ctx->ctx_lock); + return; + } ++ list_del_init(&req->wait.entry); ++ spin_unlock(&req->head->lock); + list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); +- req->done = true; + spin_unlock_irq(&ctx->ctx_lock); + + iocb_put(iocb); +@@ -1660,9 +1673,9 @@ static int aio_poll_cancel(struct kiocb + + spin_lock(&req->head->lock); + WRITE_ONCE(req->cancelled, true); +- if (!list_empty(&req->wait.entry)) { +- list_del_init(&req->wait.entry); ++ if (!req->work_scheduled) { + schedule_work(&aiocb->poll.work); ++ req->work_scheduled = true; + } + spin_unlock(&req->head->lock); + +@@ -1681,20 +1694,26 @@ static int aio_poll_wake(struct wait_que + if (mask && !(mask & req->events)) + return 0; + +- list_del_init(&req->wait.entry); +- +- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { ++ /* ++ * Complete the request inline if possible. This requires that three ++ * conditions be met: ++ * 1. An event mask must have been passed. If a plain wakeup was done ++ * instead, then mask == 0 and we have to call vfs_poll() to get ++ * the events, so inline completion isn't possible. ++ * 2. The completion work must not have already been scheduled. ++ * 3. ctx_lock must not be busy. We have to use trylock because we ++ * already hold the waitqueue lock, so this inverts the normal ++ * locking order. Use irqsave/irqrestore because not all ++ * filesystems (e.g. fuse) call this function with IRQs disabled, ++ * yet IRQs have to be disabled before ctx_lock is obtained. ++ */ ++ if (mask && !req->work_scheduled && ++ spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + struct kioctx *ctx = iocb->ki_ctx; + +- /* +- * Try to complete the iocb inline if we can. Use +- * irqsave/irqrestore because not all filesystems (e.g. fuse) +- * call this function with IRQs disabled and because IRQs +- * have to be disabled before ctx_lock is obtained. +- */ ++ list_del_init(&req->wait.entry); + list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); +- req->done = true; + if (iocb->ki_eventfd && eventfd_signal_allowed()) { + iocb = NULL; + INIT_WORK(&req->work, aio_poll_put_work); +@@ -1704,7 +1723,20 @@ static int aio_poll_wake(struct wait_que + if (iocb) + iocb_put(iocb); + } else { +- schedule_work(&req->work); ++ /* ++ * Schedule the completion work if needed. If it was already ++ * scheduled, record that another wakeup came in. ++ * ++ * Don't remove the request from the waitqueue here, as it might ++ * not actually be complete yet (we won't know until vfs_poll() ++ * is called), and we must not miss any wakeups. ++ */ ++ if (req->work_scheduled) { ++ req->work_need_resched = true; ++ } else { ++ schedule_work(&req->work); ++ req->work_scheduled = true; ++ } + } + return 1; + } +@@ -1751,8 +1783,9 @@ static int aio_poll(struct aio_kiocb *ai + req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; + + req->head = NULL; +- req->done = false; + req->cancelled = false; ++ req->work_scheduled = false; ++ req->work_need_resched = false; + + apt.pt._qproc = aio_poll_queue_proc; + apt.pt._key = req->events; +@@ -1767,17 +1800,27 @@ static int aio_poll(struct aio_kiocb *ai + spin_lock_irq(&ctx->ctx_lock); + if (likely(req->head)) { + spin_lock(&req->head->lock); +- if (unlikely(list_empty(&req->wait.entry))) { +- if (apt.error) ++ if (list_empty(&req->wait.entry) || req->work_scheduled) { ++ /* ++ * aio_poll_wake() already either scheduled the async ++ * completion work, or completed the request inline. ++ */ ++ if (apt.error) /* unsupported case: multiple queues */ + cancel = true; + apt.error = 0; + mask = 0; + } + if (mask || apt.error) { ++ /* Steal to complete synchronously. */ + list_del_init(&req->wait.entry); + } else if (cancel) { ++ /* Cancel if possible (may be too late though). */ + WRITE_ONCE(req->cancelled, true); +- } else if (!req->done) { /* actually waiting for an event */ ++ } else if (!list_empty(&req->wait.entry)) { ++ /* ++ * Actually waiting for an event, so add the request to ++ * active_reqs so that it can be cancelled if needed. ++ */ + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } diff --git a/queue-5.15/binder-use-wake_up_pollfree.patch b/queue-5.15/binder-use-wake_up_pollfree.patch new file mode 100644 index 00000000000..26740576cba --- /dev/null +++ b/queue-5.15/binder-use-wake_up_pollfree.patch @@ -0,0 +1,61 @@ +From a880b28a71e39013e357fd3adccd1d8a31bc69a8 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Wed, 8 Dec 2021 17:04:52 -0800 +Subject: binder: use wake_up_pollfree() + +From: Eric Biggers + +commit a880b28a71e39013e357fd3adccd1d8a31bc69a8 upstream. + +wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up +all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll +and aio poll are fortunately not affected by this, but it's very +fragile. Thus, the new function wake_up_pollfree() has been introduced. + +Convert binder to use wake_up_pollfree(). + +Reported-by: Linus Torvalds +Fixes: f5cb779ba163 ("ANDROID: binder: remove waitqueue when thread exits.") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20211209010455.42744-3-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + drivers/android/binder.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -4422,23 +4422,20 @@ static int binder_thread_release(struct + __release(&t->lock); + + /* +- * If this thread used poll, make sure we remove the waitqueue +- * from any epoll data structures holding it with POLLFREE. +- * waitqueue_active() is safe to use here because we're holding +- * the inner lock. ++ * If this thread used poll, make sure we remove the waitqueue from any ++ * poll data structures holding it. + */ +- if ((thread->looper & BINDER_LOOPER_STATE_POLL) && +- waitqueue_active(&thread->wait)) { +- wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); +- } ++ if (thread->looper & BINDER_LOOPER_STATE_POLL) ++ wake_up_pollfree(&thread->wait); + + binder_inner_proc_unlock(thread->proc); + + /* +- * This is needed to avoid races between wake_up_poll() above and +- * and ep_remove_waitqueue() called for other reasons (eg the epoll file +- * descriptor being closed); ep_remove_waitqueue() holds an RCU read +- * lock, so we can be sure it's done after calling synchronize_rcu(). ++ * This is needed to avoid races between wake_up_pollfree() above and ++ * someone else removing the last entry from the queue for other reasons ++ * (e.g. ep_remove_wait_queue() being called due to an epoll file ++ * descriptor being closed). Such other users hold an RCU read lock, so ++ * we can be sure they're done after we call synchronize_rcu(). + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL) + synchronize_rcu(); diff --git a/queue-5.15/series b/queue-5.15/series index 2de3ebd1f24..25e0fb29fe2 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -92,3 +92,8 @@ thermal-int340x-fix-vcoreflow-mmio-bit-offset-for-tgl.patch drm-syncobj-deal-with-signalled-fences-in-drm_syncobj_find_fence.patch libata-add-horkage-for-asmedia-1092.patch io_uring-ensure-task_work-gets-run-as-part-of-cancelations.patch +wait-add-wake_up_pollfree.patch +binder-use-wake_up_pollfree.patch +signalfd-use-wake_up_pollfree.patch +aio-keep-poll-requests-on-waitqueue-until-completed.patch +aio-fix-use-after-free-due-to-missing-pollfree-handling.patch diff --git a/queue-5.15/signalfd-use-wake_up_pollfree.patch b/queue-5.15/signalfd-use-wake_up_pollfree.patch new file mode 100644 index 00000000000..029050aff53 --- /dev/null +++ b/queue-5.15/signalfd-use-wake_up_pollfree.patch @@ -0,0 +1,47 @@ +From 9537bae0da1f8d1e2361ab6d0479e8af7824e160 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Wed, 8 Dec 2021 17:04:53 -0800 +Subject: signalfd: use wake_up_pollfree() + +From: Eric Biggers + +commit 9537bae0da1f8d1e2361ab6d0479e8af7824e160 upstream. + +wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up +all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll +and aio poll are fortunately not affected by this, but it's very +fragile. Thus, the new function wake_up_pollfree() has been introduced. + +Convert signalfd to use wake_up_pollfree(). + +Reported-by: Linus Torvalds +Fixes: d80e731ecab4 ("epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20211209010455.42744-4-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + fs/signalfd.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +--- a/fs/signalfd.c ++++ b/fs/signalfd.c +@@ -35,17 +35,7 @@ + + void signalfd_cleanup(struct sighand_struct *sighand) + { +- wait_queue_head_t *wqh = &sighand->signalfd_wqh; +- /* +- * The lockless check can race with remove_wait_queue() in progress, +- * but in this case its caller should run under rcu_read_lock() and +- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. +- */ +- if (likely(!waitqueue_active(wqh))) +- return; +- +- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ +- wake_up_poll(wqh, EPOLLHUP | POLLFREE); ++ wake_up_pollfree(&sighand->signalfd_wqh); + } + + struct signalfd_ctx { diff --git a/queue-5.15/wait-add-wake_up_pollfree.patch b/queue-5.15/wait-add-wake_up_pollfree.patch new file mode 100644 index 00000000000..a5f399ef09f --- /dev/null +++ b/queue-5.15/wait-add-wake_up_pollfree.patch @@ -0,0 +1,105 @@ +From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Wed, 8 Dec 2021 17:04:51 -0800 +Subject: wait: add wake_up_pollfree() + +From: Eric Biggers + +commit 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 upstream. + +Several ->poll() implementations are special in that they use a +waitqueue whose lifetime is the current task, rather than the struct +file as is normally the case. This is okay for blocking polls, since a +blocking poll occurs within one task; however, non-blocking polls +require another solution. This solution is for the queue to be cleared +before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'. + +However, that has a bug: wake_up_poll() calls __wake_up() with +nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters, +and the wakeup function for the first one returns a positive value, only +that one will be called. That's *not* what's needed for POLLFREE; +POLLFREE is special in that it really needs to wake up everyone. + +Considering the three non-blocking poll systems: + +- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway. + +- aio poll is unaffected, since it doesn't support exclusive waits. + However, that's fragile, as someone could add this feature later. + +- epoll doesn't appear to be broken by this, since its wakeup function + returns 0 when it sees POLLFREE. But this is fragile. + +Although there is a workaround (see epoll), it's better to define a +function which always sends POLLFREE to all waiters. Add such a +function. Also make it verify that the queue really becomes empty after +all waiters have been woken up. + +Reported-by: Linus Torvalds +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/wait.h | 26 ++++++++++++++++++++++++++ + kernel/sched/wait.c | 7 +++++++ + 2 files changed, 33 insertions(+) + +--- a/include/linux/wait.h ++++ b/include/linux/wait.h +@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queu + void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); + void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); + void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); ++void __wake_up_pollfree(struct wait_queue_head *wq_head); + + #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) + #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) +@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_he + #define wake_up_interruptible_sync_poll_locked(x, m) \ + __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) + ++/** ++ * wake_up_pollfree - signal that a polled waitqueue is going away ++ * @wq_head: the wait queue head ++ * ++ * In the very rare cases where a ->poll() implementation uses a waitqueue whose ++ * lifetime is tied to a task rather than to the 'struct file' being polled, ++ * this function must be called before the waitqueue is freed so that ++ * non-blocking polls (e.g. epoll) are notified that the queue is going away. ++ * ++ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via ++ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. ++ */ ++static inline void wake_up_pollfree(struct wait_queue_head *wq_head) ++{ ++ /* ++ * For performance reasons, we don't always take the queue lock here. ++ * Therefore, we might race with someone removing the last entry from ++ * the queue, and proceed while they still hold the queue lock. ++ * However, rcu_read_lock() is required to be held in such cases, so we ++ * can safely proceed with an RCU-delayed free. ++ */ ++ if (waitqueue_active(wq_head)) ++ __wake_up_pollfree(wq_head); ++} ++ + #define ___wait_cond_timeout(condition) \ + ({ \ + bool __cond = (condition); \ +--- a/kernel/sched/wait.c ++++ b/kernel/sched/wait.c +@@ -238,6 +238,13 @@ void __wake_up_sync(struct wait_queue_he + } + EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ + ++void __wake_up_pollfree(struct wait_queue_head *wq_head) ++{ ++ __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE)); ++ /* POLLFREE must have cleared the queue. */ ++ WARN_ON_ONCE(waitqueue_active(wq_head)); ++} ++ + /* + * Note: we use "set_current_state()" _after_ the wait-queue add, + * because we need a memory barrier there on SMP, so that any