--- /dev/null
+From foo@baz Sun Dec 12 02:37:09 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:50:54 -0800
+Subject: aio: fix use-after-free due to missing POLLFREE handling
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Message-ID: <20211210235054.40103-6-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 50252e4b5e989ce64555c7aef7516bdefc2fea72 upstream.
+
+signalfd_poll() and binder_poll() are special in that they use a
+waitqueue whose lifetime is the current task, rather than the struct
+file as is normally the case. This is okay for blocking polls, since a
+blocking poll occurs within one task; however, non-blocking polls
+require another solution. This solution is for the queue to be cleared
+before it is freed, by sending a POLLFREE notification to all waiters.
+
+Unfortunately, only eventpoll handles POLLFREE. A second type of
+non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't
+handle POLLFREE. This allows a use-after-free to occur if a signalfd or
+binder fd is polled with aio poll, and the waitqueue gets freed.
+
+Fix this by making aio poll handle POLLFREE.
+
+A patch by Ramji Jiyani <ramjiyani@google.com>
+(https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com)
+tried to do this by making aio_poll_wake() always complete the request
+inline if POLLFREE is seen. However, that solution had two bugs.
+First, it introduced a deadlock, as it unconditionally locked the aio
+context while holding the waitqueue lock, which inverts the normal
+locking order. Second, it didn't consider that POLLFREE notifications
+are missed while the request has been temporarily de-queued.
+
+The second problem was solved by my previous patch. This patch then
+properly fixes the use-after-free by handling POLLFREE in a
+deadlock-free way. It does this by taking advantage of the fact that
+freeing of the waitqueue is RCU-delayed, similar to what eventpoll does.
+
+Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL")
+Cc: <stable@vger.kernel.org> # v4.18+
+Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c | 137 ++++++++++++++++++++++++++++++----------
+ include/uapi/asm-generic/poll.h | 2
+ 2 files changed, 107 insertions(+), 32 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1627,6 +1627,51 @@ static void aio_poll_put_work(struct wor
+ iocb_put(iocb);
+ }
+
++/*
++ * Safely lock the waitqueue which the request is on, synchronizing with the
++ * case where the ->poll() provider decides to free its waitqueue early.
++ *
++ * Returns true on success, meaning that req->head->lock was locked, req->wait
++ * is on req->head, and an RCU read lock was taken. Returns false if the
++ * request was already removed from its waitqueue (which might no longer exist).
++ */
++static bool poll_iocb_lock_wq(struct poll_iocb *req)
++{
++ wait_queue_head_t *head;
++
++ /*
++ * While we hold the waitqueue lock and the waitqueue is nonempty,
++ * wake_up_pollfree() will wait for us. However, taking the waitqueue
++ * lock in the first place can race with the waitqueue being freed.
++ *
++ * We solve this as eventpoll does: by taking advantage of the fact that
++ * all users of wake_up_pollfree() will RCU-delay the actual free. If
++ * we enter rcu_read_lock() and see that the pointer to the queue is
++ * non-NULL, we can then lock it without the memory being freed out from
++ * under us, then check whether the request is still on the queue.
++ *
++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
++ * case the caller deletes the entry from the queue, leaving it empty.
++ * In that case, only RCU prevents the queue memory from being freed.
++ */
++ rcu_read_lock();
++ head = smp_load_acquire(&req->head);
++ if (head) {
++ spin_lock(&head->lock);
++ if (!list_empty(&req->wait.entry))
++ return true;
++ spin_unlock(&head->lock);
++ }
++ rcu_read_unlock();
++ return false;
++}
++
++static void poll_iocb_unlock_wq(struct poll_iocb *req)
++{
++ spin_unlock(&req->head->lock);
++ rcu_read_unlock();
++}
++
+ static void aio_poll_complete_work(struct work_struct *work)
+ {
+ struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+@@ -1646,24 +1691,25 @@ static void aio_poll_complete_work(struc
+ * avoid further branches in the fast path.
+ */
+ spin_lock_irq(&ctx->ctx_lock);
+- spin_lock(&req->head->lock);
+- if (!mask && !READ_ONCE(req->cancelled)) {
+- /*
+- * The request isn't actually ready to be completed yet.
+- * Reschedule completion if another wakeup came in.
+- */
+- if (req->work_need_resched) {
+- schedule_work(&req->work);
+- req->work_need_resched = false;
+- } else {
+- req->work_scheduled = false;
++ if (poll_iocb_lock_wq(req)) {
++ if (!mask && !READ_ONCE(req->cancelled)) {
++ /*
++ * The request isn't actually ready to be completed yet.
++ * Reschedule completion if another wakeup came in.
++ */
++ if (req->work_need_resched) {
++ schedule_work(&req->work);
++ req->work_need_resched = false;
++ } else {
++ req->work_scheduled = false;
++ }
++ poll_iocb_unlock_wq(req);
++ spin_unlock_irq(&ctx->ctx_lock);
++ return;
+ }
+- spin_unlock(&req->head->lock);
+- spin_unlock_irq(&ctx->ctx_lock);
+- return;
+- }
+- list_del_init(&req->wait.entry);
+- spin_unlock(&req->head->lock);
++ list_del_init(&req->wait.entry);
++ poll_iocb_unlock_wq(req);
++ } /* else, POLLFREE has freed the waitqueue, so we must complete */
+ list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ spin_unlock_irq(&ctx->ctx_lock);
+@@ -1677,13 +1723,14 @@ static int aio_poll_cancel(struct kiocb
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+
+- spin_lock(&req->head->lock);
+- WRITE_ONCE(req->cancelled, true);
+- if (!req->work_scheduled) {
+- schedule_work(&aiocb->poll.work);
+- req->work_scheduled = true;
+- }
+- spin_unlock(&req->head->lock);
++ if (poll_iocb_lock_wq(req)) {
++ WRITE_ONCE(req->cancelled, true);
++ if (!req->work_scheduled) {
++ schedule_work(&aiocb->poll.work);
++ req->work_scheduled = true;
++ }
++ poll_iocb_unlock_wq(req);
++ } /* else, the request was force-cancelled by POLLFREE already */
+
+ return 0;
+ }
+@@ -1735,7 +1782,8 @@ static int aio_poll_wake(struct wait_que
+ *
+ * Don't remove the request from the waitqueue here, as it might
+ * not actually be complete yet (we won't know until vfs_poll()
+- * is called), and we must not miss any wakeups.
++ * is called), and we must not miss any wakeups. POLLFREE is an
++ * exception to this; see below.
+ */
+ if (req->work_scheduled) {
+ req->work_need_resched = true;
+@@ -1743,6 +1791,28 @@ static int aio_poll_wake(struct wait_que
+ schedule_work(&req->work);
+ req->work_scheduled = true;
+ }
++
++ /*
++ * If the waitqueue is being freed early but we can't complete
++ * the request inline, we have to tear down the request as best
++ * we can. That means immediately removing the request from its
++ * waitqueue and preventing all further accesses to the
++ * waitqueue via the request. We also need to schedule the
++ * completion work (done above). Also mark the request as
++ * cancelled, to potentially skip an unneeded call to ->poll().
++ */
++ if (mask & POLLFREE) {
++ WRITE_ONCE(req->cancelled, true);
++ list_del_init(&req->wait.entry);
++
++ /*
++ * Careful: this *must* be the last step, since as soon
++ * as req->head is NULL'ed out, the request can be
++ * completed and freed, since aio_poll_complete_work()
++ * will no longer need to take the waitqueue lock.
++ */
++ smp_store_release(&req->head, NULL);
++ }
+ }
+ return 1;
+ }
+@@ -1750,6 +1820,7 @@ static int aio_poll_wake(struct wait_que
+ struct aio_poll_table {
+ struct poll_table_struct pt;
+ struct aio_kiocb *iocb;
++ bool queued;
+ int error;
+ };
+
+@@ -1760,11 +1831,12 @@ aio_poll_queue_proc(struct file *file, s
+ struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+ /* multiple wait queues per file are not supported */
+- if (unlikely(pt->iocb->poll.head)) {
++ if (unlikely(pt->queued)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
++ pt->queued = true;
+ pt->error = 0;
+ pt->iocb->poll.head = head;
+ add_wait_queue(head, &pt->iocb->poll.wait);
+@@ -1796,6 +1868,7 @@ static int aio_poll(struct aio_kiocb *ai
+ apt.pt._qproc = aio_poll_queue_proc;
+ apt.pt._key = req->events;
+ apt.iocb = aiocb;
++ apt.queued = false;
+ apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+ /* initialized the list so that we can do list_empty checks */
+@@ -1804,9 +1877,10 @@ static int aio_poll(struct aio_kiocb *ai
+
+ mask = vfs_poll(req->file, &apt.pt) & req->events;
+ spin_lock_irq(&ctx->ctx_lock);
+- if (likely(req->head)) {
+- spin_lock(&req->head->lock);
+- if (list_empty(&req->wait.entry) || req->work_scheduled) {
++ if (likely(apt.queued)) {
++ bool on_queue = poll_iocb_lock_wq(req);
++
++ if (!on_queue || req->work_scheduled) {
+ /*
+ * aio_poll_wake() already either scheduled the async
+ * completion work, or completed the request inline.
+@@ -1822,7 +1896,7 @@ static int aio_poll(struct aio_kiocb *ai
+ } else if (cancel) {
+ /* Cancel if possible (may be too late though). */
+ WRITE_ONCE(req->cancelled, true);
+- } else if (!list_empty(&req->wait.entry)) {
++ } else if (on_queue) {
+ /*
+ * Actually waiting for an event, so add the request to
+ * active_reqs so that it can be cancelled if needed.
+@@ -1830,7 +1904,8 @@ static int aio_poll(struct aio_kiocb *ai
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+- spin_unlock(&req->head->lock);
++ if (on_queue)
++ poll_iocb_unlock_wq(req);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ aiocb->ki_res.res = mangle_poll(mask);
+--- a/include/uapi/asm-generic/poll.h
++++ b/include/uapi/asm-generic/poll.h
+@@ -29,7 +29,7 @@
+ #define POLLRDHUP 0x2000
+ #endif
+
+-#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */
++#define POLLFREE (__force __poll_t)0x4000
+
+ #define POLL_BUSY_LOOP (__force __poll_t)0x8000
+
--- /dev/null
+From foo@baz Sun Dec 12 02:37:09 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:50:53 -0800
+Subject: aio: keep poll requests on waitqueue until completed
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Message-ID: <20211210235054.40103-5-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 363bee27e25804d8981dd1c025b4ad49dc39c530 upstream.
+
+Currently, aio_poll_wake() will always remove the poll request from the
+waitqueue. Then, if aio_poll_complete_work() sees that none of the
+polled events are ready and the request isn't cancelled, it re-adds the
+request to the waitqueue. (This can easily happen when polling a file
+that doesn't pass an event mask when waking up its waitqueue.)
+
+This is fundamentally broken for two reasons:
+
+ 1. If a wakeup occurs between vfs_poll() and the request being
+ re-added to the waitqueue, it will be missed because the request
+ wasn't on the waitqueue at the time. Therefore, IOCB_CMD_POLL
+ might never complete even if the polled file is ready.
+
+ 2. When the request isn't on the waitqueue, there is no way to be
+ notified that the waitqueue is being freed (which happens when its
+ lifetime is shorter than the struct file's). This is supposed to
+ happen via the waitqueue entries being woken up with POLLFREE.
+
+Therefore, leave the requests on the waitqueue until they are actually
+completed (or cancelled). To keep track of when aio_poll_complete_work
+needs to be scheduled, use new fields in struct poll_iocb. Remove the
+'done' field which is now redundant.
+
+Note that this is consistent with how sys_poll() and eventpoll work;
+their wakeup functions do *not* remove the waitqueue entries.
+
+Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL")
+Cc: <stable@vger.kernel.org> # v4.18+
+Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 63 insertions(+), 20 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -183,8 +183,9 @@ struct poll_iocb {
+ struct file *file;
+ struct wait_queue_head *head;
+ __poll_t events;
+- bool done;
+ bool cancelled;
++ bool work_scheduled;
++ bool work_need_resched;
+ struct wait_queue_entry wait;
+ struct work_struct work;
+ };
+@@ -1645,14 +1646,26 @@ static void aio_poll_complete_work(struc
+ * avoid further branches in the fast path.
+ */
+ spin_lock_irq(&ctx->ctx_lock);
++ spin_lock(&req->head->lock);
+ if (!mask && !READ_ONCE(req->cancelled)) {
+- add_wait_queue(req->head, &req->wait);
++ /*
++ * The request isn't actually ready to be completed yet.
++ * Reschedule completion if another wakeup came in.
++ */
++ if (req->work_need_resched) {
++ schedule_work(&req->work);
++ req->work_need_resched = false;
++ } else {
++ req->work_scheduled = false;
++ }
++ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+ return;
+ }
++ list_del_init(&req->wait.entry);
++ spin_unlock(&req->head->lock);
+ list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+- req->done = true;
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ iocb_put(iocb);
+@@ -1666,9 +1679,9 @@ static int aio_poll_cancel(struct kiocb
+
+ spin_lock(&req->head->lock);
+ WRITE_ONCE(req->cancelled, true);
+- if (!list_empty(&req->wait.entry)) {
+- list_del_init(&req->wait.entry);
++ if (!req->work_scheduled) {
+ schedule_work(&aiocb->poll.work);
++ req->work_scheduled = true;
+ }
+ spin_unlock(&req->head->lock);
+
+@@ -1687,20 +1700,26 @@ static int aio_poll_wake(struct wait_que
+ if (mask && !(mask & req->events))
+ return 0;
+
+- list_del_init(&req->wait.entry);
+-
+- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
++ /*
++ * Complete the request inline if possible. This requires that three
++ * conditions be met:
++ * 1. An event mask must have been passed. If a plain wakeup was done
++ * instead, then mask == 0 and we have to call vfs_poll() to get
++ * the events, so inline completion isn't possible.
++ * 2. The completion work must not have already been scheduled.
++ * 3. ctx_lock must not be busy. We have to use trylock because we
++ * already hold the waitqueue lock, so this inverts the normal
++ * locking order. Use irqsave/irqrestore because not all
++ * filesystems (e.g. fuse) call this function with IRQs disabled,
++ * yet IRQs have to be disabled before ctx_lock is obtained.
++ */
++ if (mask && !req->work_scheduled &&
++ spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ struct kioctx *ctx = iocb->ki_ctx;
+
+- /*
+- * Try to complete the iocb inline if we can. Use
+- * irqsave/irqrestore because not all filesystems (e.g. fuse)
+- * call this function with IRQs disabled and because IRQs
+- * have to be disabled before ctx_lock is obtained.
+- */
++ list_del_init(&req->wait.entry);
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+- req->done = true;
+ if (iocb->ki_eventfd && eventfd_signal_count()) {
+ iocb = NULL;
+ INIT_WORK(&req->work, aio_poll_put_work);
+@@ -1710,7 +1729,20 @@ static int aio_poll_wake(struct wait_que
+ if (iocb)
+ iocb_put(iocb);
+ } else {
+- schedule_work(&req->work);
++ /*
++ * Schedule the completion work if needed. If it was already
++ * scheduled, record that another wakeup came in.
++ *
++ * Don't remove the request from the waitqueue here, as it might
++ * not actually be complete yet (we won't know until vfs_poll()
++ * is called), and we must not miss any wakeups.
++ */
++ if (req->work_scheduled) {
++ req->work_need_resched = true;
++ } else {
++ schedule_work(&req->work);
++ req->work_scheduled = true;
++ }
+ }
+ return 1;
+ }
+@@ -1757,8 +1789,9 @@ static int aio_poll(struct aio_kiocb *ai
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+
+ req->head = NULL;
+- req->done = false;
+ req->cancelled = false;
++ req->work_scheduled = false;
++ req->work_need_resched = false;
+
+ apt.pt._qproc = aio_poll_queue_proc;
+ apt.pt._key = req->events;
+@@ -1773,17 +1806,27 @@ static int aio_poll(struct aio_kiocb *ai
+ spin_lock_irq(&ctx->ctx_lock);
+ if (likely(req->head)) {
+ spin_lock(&req->head->lock);
+- if (unlikely(list_empty(&req->wait.entry))) {
+- if (apt.error)
++ if (list_empty(&req->wait.entry) || req->work_scheduled) {
++ /*
++ * aio_poll_wake() already either scheduled the async
++ * completion work, or completed the request inline.
++ */
++ if (apt.error) /* unsupported case: multiple queues */
+ cancel = true;
+ apt.error = 0;
+ mask = 0;
+ }
+ if (mask || apt.error) {
++ /* Steal to complete synchronously. */
+ list_del_init(&req->wait.entry);
+ } else if (cancel) {
++ /* Cancel if possible (may be too late though). */
+ WRITE_ONCE(req->cancelled, true);
+- } else if (!req->done) { /* actually waiting for an event */
++ } else if (!list_empty(&req->wait.entry)) {
++ /*
++ * Actually waiting for an event, so add the request to
++ * active_reqs so that it can be cancelled if needed.
++ */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
--- /dev/null
+From foo@baz Sun Dec 12 02:37:09 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:50:51 -0800
+Subject: binder: use wake_up_pollfree()
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20211210235054.40103-3-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit a880b28a71e39013e357fd3adccd1d8a31bc69a8 upstream.
+
+wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up
+all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll
+and aio poll are fortunately not affected by this, but it's very
+fragile. Thus, the new function wake_up_pollfree() has been introduced.
+
+Convert binder to use wake_up_pollfree().
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: f5cb779ba163 ("ANDROID: binder: remove waitqueue when thread exits.")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-3-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/android/binder.c | 21 +++++++++------------
+ 1 file changed, 9 insertions(+), 12 deletions(-)
+
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -4788,23 +4788,20 @@ static int binder_thread_release(struct
+ __release(&t->lock);
+
+ /*
+- * If this thread used poll, make sure we remove the waitqueue
+- * from any epoll data structures holding it with POLLFREE.
+- * waitqueue_active() is safe to use here because we're holding
+- * the inner lock.
++ * If this thread used poll, make sure we remove the waitqueue from any
++ * poll data structures holding it.
+ */
+- if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
+- waitqueue_active(&thread->wait)) {
+- wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE);
+- }
++ if (thread->looper & BINDER_LOOPER_STATE_POLL)
++ wake_up_pollfree(&thread->wait);
+
+ binder_inner_proc_unlock(thread->proc);
+
+ /*
+- * This is needed to avoid races between wake_up_poll() above and
+- * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+- * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+- * lock, so we can be sure it's done after calling synchronize_rcu().
++ * This is needed to avoid races between wake_up_pollfree() above and
++ * someone else removing the last entry from the queue for other reasons
++ * (e.g. ep_remove_wait_queue() being called due to an epoll file
++ * descriptor being closed). Such other users hold an RCU read lock, so
++ * we can be sure they're done after we call synchronize_rcu().
+ */
+ if (thread->looper & BINDER_LOOPER_STATE_POLL)
+ synchronize_rcu();
can-m_can-disable-and-ignore-elo-interrupt.patch
x86-sme-explicitly-map-new-efi-memmap-table-as-encrypted.patch
libata-add-horkage-for-asmedia-1092.patch
+wait-add-wake_up_pollfree.patch
+binder-use-wake_up_pollfree.patch
+signalfd-use-wake_up_pollfree.patch
+aio-keep-poll-requests-on-waitqueue-until-completed.patch
+aio-fix-use-after-free-due-to-missing-pollfree-handling.patch
--- /dev/null
+From foo@baz Sun Dec 12 02:37:09 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:50:52 -0800
+Subject: signalfd: use wake_up_pollfree()
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20211210235054.40103-4-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 9537bae0da1f8d1e2361ab6d0479e8af7824e160 upstream.
+
+wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up
+all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll
+and aio poll are fortunately not affected by this, but it's very
+fragile. Thus, the new function wake_up_pollfree() has been introduced.
+
+Convert signalfd to use wake_up_pollfree().
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: d80e731ecab4 ("epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-4-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/signalfd.c | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+--- a/fs/signalfd.c
++++ b/fs/signalfd.c
+@@ -35,17 +35,7 @@
+
+ void signalfd_cleanup(struct sighand_struct *sighand)
+ {
+- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+- /*
+- * The lockless check can race with remove_wait_queue() in progress,
+- * but in this case its caller should run under rcu_read_lock() and
+- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
+- */
+- if (likely(!waitqueue_active(wqh)))
+- return;
+-
+- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
+- wake_up_poll(wqh, EPOLLHUP | POLLFREE);
++ wake_up_pollfree(&sighand->signalfd_wqh);
+ }
+
+ struct signalfd_ctx {
--- /dev/null
+From foo@baz Sun Dec 12 02:37:09 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:50:50 -0800
+Subject: wait: add wake_up_pollfree()
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Linus Torvalds <torvalds@linux-foundation.org>
+Message-ID: <20211210235054.40103-2-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 upstream.
+
+Several ->poll() implementations are special in that they use a
+waitqueue whose lifetime is the current task, rather than the struct
+file as is normally the case. This is okay for blocking polls, since a
+blocking poll occurs within one task; however, non-blocking polls
+require another solution. This solution is for the queue to be cleared
+before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'.
+
+However, that has a bug: wake_up_poll() calls __wake_up() with
+nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters,
+and the wakeup function for the first one returns a positive value, only
+that one will be called. That's *not* what's needed for POLLFREE;
+POLLFREE is special in that it really needs to wake up everyone.
+
+Considering the three non-blocking poll systems:
+
+- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway.
+
+- aio poll is unaffected, since it doesn't support exclusive waits.
+ However, that's fragile, as someone could add this feature later.
+
+- epoll doesn't appear to be broken by this, since its wakeup function
+ returns 0 when it sees POLLFREE. But this is fragile.
+
+Although there is a workaround (see epoll), it's better to define a
+function which always sends POLLFREE to all waiters. Add such a
+function. Also make it verify that the queue really becomes empty after
+all waiters have been woken up.
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/wait.h | 26 ++++++++++++++++++++++++++
+ kernel/sched/wait.c | 7 +++++++
+ 2 files changed, 33 insertions(+)
+
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -204,6 +204,7 @@ void __wake_up_locked_key_bookmark(struc
+ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
++void __wake_up_pollfree(struct wait_queue_head *wq_head);
+
+ #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
+ #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
+@@ -230,6 +231,31 @@ void __wake_up_sync(struct wait_queue_he
+ #define wake_up_interruptible_sync_poll(x, m) \
+ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m))
+
++/**
++ * wake_up_pollfree - signal that a polled waitqueue is going away
++ * @wq_head: the wait queue head
++ *
++ * In the very rare cases where a ->poll() implementation uses a waitqueue whose
++ * lifetime is tied to a task rather than to the 'struct file' being polled,
++ * this function must be called before the waitqueue is freed so that
++ * non-blocking polls (e.g. epoll) are notified that the queue is going away.
++ *
++ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
++ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
++ */
++static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++ /*
++ * For performance reasons, we don't always take the queue lock here.
++ * Therefore, we might race with someone removing the last entry from
++ * the queue, and proceed while they still hold the queue lock.
++ * However, rcu_read_lock() is required to be held in such cases, so we
++ * can safely proceed with an RCU-delayed free.
++ */
++ if (waitqueue_active(wq_head))
++ __wake_up_pollfree(wq_head);
++}
++
+ #define ___wait_cond_timeout(condition) \
+ ({ \
+ bool __cond = (condition); \
+--- a/kernel/sched/wait.c
++++ b/kernel/sched/wait.c
+@@ -206,6 +206,13 @@ void __wake_up_sync(struct wait_queue_he
+ }
+ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
+
++void __wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++ __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
++ /* POLLFREE must have cleared the queue. */
++ WARN_ON_ONCE(waitqueue_active(wq_head));
++}
++
+ /*
+ * Note: we use "set_current_state()" _after_ the wait-queue add,
+ * because we need a memory barrier there on SMP, so that any