5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)
diff --git a/queue-5.10/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch b/queue-5.10/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch

new file mode 100644 (file)

index 0000000..f726f65
--- /dev/null
+++ b/queue-5.10/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch
@@ -0,0 +1,282 @@
+From foo@baz Sun Dec 12 02:34:36 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:48:05 -0800
+Subject: aio: fix use-after-free due to missing POLLFREE handling
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Message-ID: <20211210234805.39861-6-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 50252e4b5e989ce64555c7aef7516bdefc2fea72 upstream.
+
+signalfd_poll() and binder_poll() are special in that they use a
+waitqueue whose lifetime is the current task, rather than the struct
+file as is normally the case.  This is okay for blocking polls, since a
+blocking poll occurs within one task; however, non-blocking polls
+require another solution.  This solution is for the queue to be cleared
+before it is freed, by sending a POLLFREE notification to all waiters.
+
+Unfortunately, only eventpoll handles POLLFREE.  A second type of
+non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't
+handle POLLFREE.  This allows a use-after-free to occur if a signalfd or
+binder fd is polled with aio poll, and the waitqueue gets freed.
+
+Fix this by making aio poll handle POLLFREE.
+
+A patch by Ramji Jiyani <ramjiyani@google.com>
+(https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com)
+tried to do this by making aio_poll_wake() always complete the request
+inline if POLLFREE is seen.  However, that solution had two bugs.
+First, it introduced a deadlock, as it unconditionally locked the aio
+context while holding the waitqueue lock, which inverts the normal
+locking order.  Second, it didn't consider that POLLFREE notifications
+are missed while the request has been temporarily de-queued.
+
+The second problem was solved by my previous patch.  This patch then
+properly fixes the use-after-free by handling POLLFREE in a
+deadlock-free way.  It does this by taking advantage of the fact that
+freeing of the waitqueue is RCU-delayed, similar to what eventpoll does.
+
+Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL")
+Cc: <stable@vger.kernel.org> # v4.18+
+Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c                        |  137 ++++++++++++++++++++++++++++++----------
+ include/uapi/asm-generic/poll.h |    2 
+ 2 files changed, 107 insertions(+), 32 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1622,6 +1622,51 @@ static void aio_poll_put_work(struct wor
+       iocb_put(iocb);
+ }
+ 
++/*
++ * Safely lock the waitqueue which the request is on, synchronizing with the
++ * case where the ->poll() provider decides to free its waitqueue early.
++ *
++ * Returns true on success, meaning that req->head->lock was locked, req->wait
++ * is on req->head, and an RCU read lock was taken.  Returns false if the
++ * request was already removed from its waitqueue (which might no longer exist).
++ */
++static bool poll_iocb_lock_wq(struct poll_iocb *req)
++{
++      wait_queue_head_t *head;
++
++      /*
++       * While we hold the waitqueue lock and the waitqueue is nonempty,
++       * wake_up_pollfree() will wait for us.  However, taking the waitqueue
++       * lock in the first place can race with the waitqueue being freed.
++       *
++       * We solve this as eventpoll does: by taking advantage of the fact that
++       * all users of wake_up_pollfree() will RCU-delay the actual free.  If
++       * we enter rcu_read_lock() and see that the pointer to the queue is
++       * non-NULL, we can then lock it without the memory being freed out from
++       * under us, then check whether the request is still on the queue.
++       *
++       * Keep holding rcu_read_lock() as long as we hold the queue lock, in
++       * case the caller deletes the entry from the queue, leaving it empty.
++       * In that case, only RCU prevents the queue memory from being freed.
++       */
++      rcu_read_lock();
++      head = smp_load_acquire(&req->head);
++      if (head) {
++              spin_lock(&head->lock);
++              if (!list_empty(&req->wait.entry))
++                      return true;
++              spin_unlock(&head->lock);
++      }
++      rcu_read_unlock();
++      return false;
++}
++
++static void poll_iocb_unlock_wq(struct poll_iocb *req)
++{
++      spin_unlock(&req->head->lock);
++      rcu_read_unlock();
++}
++
+ static void aio_poll_complete_work(struct work_struct *work)
+ {
+       struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+@@ -1641,24 +1686,25 @@ static void aio_poll_complete_work(struc
+        * avoid further branches in the fast path.
+        */
+       spin_lock_irq(&ctx->ctx_lock);
+-      spin_lock(&req->head->lock);
+-      if (!mask && !READ_ONCE(req->cancelled)) {
+-              /*
+-               * The request isn't actually ready to be completed yet.
+-               * Reschedule completion if another wakeup came in.
+-               */
+-              if (req->work_need_resched) {
+-                      schedule_work(&req->work);
+-                      req->work_need_resched = false;
+-              } else {
+-                      req->work_scheduled = false;
++      if (poll_iocb_lock_wq(req)) {
++              if (!mask && !READ_ONCE(req->cancelled)) {
++                      /*
++                       * The request isn't actually ready to be completed yet.
++                       * Reschedule completion if another wakeup came in.
++                       */
++                      if (req->work_need_resched) {
++                              schedule_work(&req->work);
++                              req->work_need_resched = false;
++                      } else {
++                              req->work_scheduled = false;
++                      }
++                      poll_iocb_unlock_wq(req);
++                      spin_unlock_irq(&ctx->ctx_lock);
++                      return;
+               }
+-              spin_unlock(&req->head->lock);
+-              spin_unlock_irq(&ctx->ctx_lock);
+-              return;
+-      }
+-      list_del_init(&req->wait.entry);
+-      spin_unlock(&req->head->lock);
++              list_del_init(&req->wait.entry);
++              poll_iocb_unlock_wq(req);
++      } /* else, POLLFREE has freed the waitqueue, so we must complete */
+       list_del_init(&iocb->ki_list);
+       iocb->ki_res.res = mangle_poll(mask);
+       spin_unlock_irq(&ctx->ctx_lock);
+@@ -1672,13 +1718,14 @@ static int aio_poll_cancel(struct kiocb
+       struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+       struct poll_iocb *req = &aiocb->poll;
+ 
+-      spin_lock(&req->head->lock);
+-      WRITE_ONCE(req->cancelled, true);
+-      if (!req->work_scheduled) {
+-              schedule_work(&aiocb->poll.work);
+-              req->work_scheduled = true;
+-      }
+-      spin_unlock(&req->head->lock);
++      if (poll_iocb_lock_wq(req)) {
++              WRITE_ONCE(req->cancelled, true);
++              if (!req->work_scheduled) {
++                      schedule_work(&aiocb->poll.work);
++                      req->work_scheduled = true;
++              }
++              poll_iocb_unlock_wq(req);
++      } /* else, the request was force-cancelled by POLLFREE already */
+ 
+       return 0;
+ }
+@@ -1730,7 +1777,8 @@ static int aio_poll_wake(struct wait_que
+                *
+                * Don't remove the request from the waitqueue here, as it might
+                * not actually be complete yet (we won't know until vfs_poll()
+-               * is called), and we must not miss any wakeups.
++               * is called), and we must not miss any wakeups.  POLLFREE is an
++               * exception to this; see below.
+                */
+               if (req->work_scheduled) {
+                       req->work_need_resched = true;
+@@ -1738,6 +1786,28 @@ static int aio_poll_wake(struct wait_que
+                       schedule_work(&req->work);
+                       req->work_scheduled = true;
+               }
++
++              /*
++               * If the waitqueue is being freed early but we can't complete
++               * the request inline, we have to tear down the request as best
++               * we can.  That means immediately removing the request from its
++               * waitqueue and preventing all further accesses to the
++               * waitqueue via the request.  We also need to schedule the
++               * completion work (done above).  Also mark the request as
++               * cancelled, to potentially skip an unneeded call to ->poll().
++               */
++              if (mask & POLLFREE) {
++                      WRITE_ONCE(req->cancelled, true);
++                      list_del_init(&req->wait.entry);
++
++                      /*
++                       * Careful: this *must* be the last step, since as soon
++                       * as req->head is NULL'ed out, the request can be
++                       * completed and freed, since aio_poll_complete_work()
++                       * will no longer need to take the waitqueue lock.
++                       */
++                      smp_store_release(&req->head, NULL);
++              }
+       }
+       return 1;
+ }
+@@ -1745,6 +1815,7 @@ static int aio_poll_wake(struct wait_que
+ struct aio_poll_table {
+       struct poll_table_struct        pt;
+       struct aio_kiocb                *iocb;
++      bool                            queued;
+       int                             error;
+ };
+ 
+@@ -1755,11 +1826,12 @@ aio_poll_queue_proc(struct file *file, s
+       struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+ 
+       /* multiple wait queues per file are not supported */
+-      if (unlikely(pt->iocb->poll.head)) {
++      if (unlikely(pt->queued)) {
+               pt->error = -EINVAL;
+               return;
+       }
+ 
++      pt->queued = true;
+       pt->error = 0;
+       pt->iocb->poll.head = head;
+       add_wait_queue(head, &pt->iocb->poll.wait);
+@@ -1791,6 +1863,7 @@ static int aio_poll(struct aio_kiocb *ai
+       apt.pt._qproc = aio_poll_queue_proc;
+       apt.pt._key = req->events;
+       apt.iocb = aiocb;
++      apt.queued = false;
+       apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+ 
+       /* initialized the list so that we can do list_empty checks */
+@@ -1799,9 +1872,10 @@ static int aio_poll(struct aio_kiocb *ai
+ 
+       mask = vfs_poll(req->file, &apt.pt) & req->events;
+       spin_lock_irq(&ctx->ctx_lock);
+-      if (likely(req->head)) {
+-              spin_lock(&req->head->lock);
+-              if (list_empty(&req->wait.entry) || req->work_scheduled) {
++      if (likely(apt.queued)) {
++              bool on_queue = poll_iocb_lock_wq(req);
++
++              if (!on_queue || req->work_scheduled) {
+                       /*
+                        * aio_poll_wake() already either scheduled the async
+                        * completion work, or completed the request inline.
+@@ -1817,7 +1891,7 @@ static int aio_poll(struct aio_kiocb *ai
+               } else if (cancel) {
+                       /* Cancel if possible (may be too late though). */
+                       WRITE_ONCE(req->cancelled, true);
+-              } else if (!list_empty(&req->wait.entry)) {
++              } else if (on_queue) {
+                       /*
+                        * Actually waiting for an event, so add the request to
+                        * active_reqs so that it can be cancelled if needed.
+@@ -1825,7 +1899,8 @@ static int aio_poll(struct aio_kiocb *ai
+                       list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+                       aiocb->ki_cancel = aio_poll_cancel;
+               }
+-              spin_unlock(&req->head->lock);
++              if (on_queue)
++                      poll_iocb_unlock_wq(req);
+       }
+       if (mask) { /* no async, we'd stolen it */
+               aiocb->ki_res.res = mangle_poll(mask);
+--- a/include/uapi/asm-generic/poll.h
++++ b/include/uapi/asm-generic/poll.h
+@@ -29,7 +29,7 @@
+ #define POLLRDHUP       0x2000
+ #endif
+ 
+-#define POLLFREE      (__force __poll_t)0x4000        /* currently only for epoll */
++#define POLLFREE      (__force __poll_t)0x4000
+ 
+ #define POLL_BUSY_LOOP        (__force __poll_t)0x8000
+ 
diff --git a/queue-5.10/aio-keep-poll-requests-on-waitqueue-until-completed.patch b/queue-5.10/aio-keep-poll-requests-on-waitqueue-until-completed.patch

new file mode 100644 (file)

index 0000000..3c609e7
--- /dev/null
+++ b/queue-5.10/aio-keep-poll-requests-on-waitqueue-until-completed.patch
@@ -0,0 +1,202 @@
+From foo@baz Sun Dec 12 02:34:36 PM CET 2021
+From: Eric Biggers <ebiggers@kernel.org>
+Date: Fri, 10 Dec 2021 15:48:04 -0800
+Subject: aio: keep poll requests on waitqueue until completed
+To: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Message-ID: <20211210234805.39861-5-ebiggers@kernel.org>
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 363bee27e25804d8981dd1c025b4ad49dc39c530 upstream.
+
+Currently, aio_poll_wake() will always remove the poll request from the
+waitqueue.  Then, if aio_poll_complete_work() sees that none of the
+polled events are ready and the request isn't cancelled, it re-adds the
+request to the waitqueue.  (This can easily happen when polling a file
+that doesn't pass an event mask when waking up its waitqueue.)
+
+This is fundamentally broken for two reasons:
+
+  1. If a wakeup occurs between vfs_poll() and the request being
+     re-added to the waitqueue, it will be missed because the request
+     wasn't on the waitqueue at the time.  Therefore, IOCB_CMD_POLL
+     might never complete even if the polled file is ready.
+
+  2. When the request isn't on the waitqueue, there is no way to be
+     notified that the waitqueue is being freed (which happens when its
+     lifetime is shorter than the struct file's).  This is supposed to
+     happen via the waitqueue entries being woken up with POLLFREE.
+
+Therefore, leave the requests on the waitqueue until they are actually
+completed (or cancelled).  To keep track of when aio_poll_complete_work
+needs to be scheduled, use new fields in struct poll_iocb.  Remove the
+'done' field which is now redundant.
+
+Note that this is consistent with how sys_poll() and eventpoll work;
+their wakeup functions do *not* remove the waitqueue entries.
+
+Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL")
+Cc: <stable@vger.kernel.org> # v4.18+
+Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/aio.c |   83 +++++++++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 63 insertions(+), 20 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -182,8 +182,9 @@ struct poll_iocb {
+       struct file             *file;
+       struct wait_queue_head  *head;
+       __poll_t                events;
+-      bool                    done;
+       bool                    cancelled;
++      bool                    work_scheduled;
++      bool                    work_need_resched;
+       struct wait_queue_entry wait;
+       struct work_struct      work;
+ };
+@@ -1640,14 +1641,26 @@ static void aio_poll_complete_work(struc
+        * avoid further branches in the fast path.
+        */
+       spin_lock_irq(&ctx->ctx_lock);
++      spin_lock(&req->head->lock);
+       if (!mask && !READ_ONCE(req->cancelled)) {
+-              add_wait_queue(req->head, &req->wait);
++              /*
++               * The request isn't actually ready to be completed yet.
++               * Reschedule completion if another wakeup came in.
++               */
++              if (req->work_need_resched) {
++                      schedule_work(&req->work);
++                      req->work_need_resched = false;
++              } else {
++                      req->work_scheduled = false;
++              }
++              spin_unlock(&req->head->lock);
+               spin_unlock_irq(&ctx->ctx_lock);
+               return;
+       }
++      list_del_init(&req->wait.entry);
++      spin_unlock(&req->head->lock);
+       list_del_init(&iocb->ki_list);
+       iocb->ki_res.res = mangle_poll(mask);
+-      req->done = true;
+       spin_unlock_irq(&ctx->ctx_lock);
+ 
+       iocb_put(iocb);
+@@ -1661,9 +1674,9 @@ static int aio_poll_cancel(struct kiocb
+ 
+       spin_lock(&req->head->lock);
+       WRITE_ONCE(req->cancelled, true);
+-      if (!list_empty(&req->wait.entry)) {
+-              list_del_init(&req->wait.entry);
++      if (!req->work_scheduled) {
+               schedule_work(&aiocb->poll.work);
++              req->work_scheduled = true;
+       }
+       spin_unlock(&req->head->lock);
+ 
+@@ -1682,20 +1695,26 @@ static int aio_poll_wake(struct wait_que
+       if (mask && !(mask & req->events))
+               return 0;
+ 
+-      list_del_init(&req->wait.entry);
+-
+-      if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
++      /*
++       * Complete the request inline if possible.  This requires that three
++       * conditions be met:
++       *   1. An event mask must have been passed.  If a plain wakeup was done
++       *      instead, then mask == 0 and we have to call vfs_poll() to get
++       *      the events, so inline completion isn't possible.
++       *   2. The completion work must not have already been scheduled.
++       *   3. ctx_lock must not be busy.  We have to use trylock because we
++       *      already hold the waitqueue lock, so this inverts the normal
++       *      locking order.  Use irqsave/irqrestore because not all
++       *      filesystems (e.g. fuse) call this function with IRQs disabled,
++       *      yet IRQs have to be disabled before ctx_lock is obtained.
++       */
++      if (mask && !req->work_scheduled &&
++          spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+               struct kioctx *ctx = iocb->ki_ctx;
+ 
+-              /*
+-               * Try to complete the iocb inline if we can. Use
+-               * irqsave/irqrestore because not all filesystems (e.g. fuse)
+-               * call this function with IRQs disabled and because IRQs
+-               * have to be disabled before ctx_lock is obtained.
+-               */
++              list_del_init(&req->wait.entry);
+               list_del(&iocb->ki_list);
+               iocb->ki_res.res = mangle_poll(mask);
+-              req->done = true;
+               if (iocb->ki_eventfd && eventfd_signal_count()) {
+                       iocb = NULL;
+                       INIT_WORK(&req->work, aio_poll_put_work);
+@@ -1705,7 +1724,20 @@ static int aio_poll_wake(struct wait_que
+               if (iocb)
+                       iocb_put(iocb);
+       } else {
+-              schedule_work(&req->work);
++              /*
++               * Schedule the completion work if needed.  If it was already
++               * scheduled, record that another wakeup came in.
++               *
++               * Don't remove the request from the waitqueue here, as it might
++               * not actually be complete yet (we won't know until vfs_poll()
++               * is called), and we must not miss any wakeups.
++               */
++              if (req->work_scheduled) {
++                      req->work_need_resched = true;
++              } else {
++                      schedule_work(&req->work);
++                      req->work_scheduled = true;
++              }
+       }
+       return 1;
+ }
+@@ -1752,8 +1784,9 @@ static int aio_poll(struct aio_kiocb *ai
+       req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ 
+       req->head = NULL;
+-      req->done = false;
+       req->cancelled = false;
++      req->work_scheduled = false;
++      req->work_need_resched = false;
+ 
+       apt.pt._qproc = aio_poll_queue_proc;
+       apt.pt._key = req->events;
+@@ -1768,17 +1801,27 @@ static int aio_poll(struct aio_kiocb *ai
+       spin_lock_irq(&ctx->ctx_lock);
+       if (likely(req->head)) {
+               spin_lock(&req->head->lock);
+-              if (unlikely(list_empty(&req->wait.entry))) {
+-                      if (apt.error)
++              if (list_empty(&req->wait.entry) || req->work_scheduled) {
++                      /*
++                       * aio_poll_wake() already either scheduled the async
++                       * completion work, or completed the request inline.
++                       */
++                      if (apt.error) /* unsupported case: multiple queues */
+                               cancel = true;
+                       apt.error = 0;
+                       mask = 0;
+               }
+               if (mask || apt.error) {
++                      /* Steal to complete synchronously. */
+                       list_del_init(&req->wait.entry);
+               } else if (cancel) {
++                      /* Cancel if possible (may be too late though). */
+                       WRITE_ONCE(req->cancelled, true);
+-              } else if (!req->done) { /* actually waiting for an event */
++              } else if (!list_empty(&req->wait.entry)) {
++                      /*
++                       * Actually waiting for an event, so add the request to
++                       * active_reqs so that it can be cancelled if needed.
++                       */
+                       list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+                       aiocb->ki_cancel = aio_poll_cancel;
+               }
diff --git a/queue-5.10/binder-use-wake_up_pollfree.patch b/queue-5.10/binder-use-wake_up_pollfree.patch

new file mode 100644 (file)

index 0000000..d3b93a5
--- /dev/null
+++ b/queue-5.10/binder-use-wake_up_pollfree.patch
@@ -0,0 +1,61 @@
+From a880b28a71e39013e357fd3adccd1d8a31bc69a8 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Wed, 8 Dec 2021 17:04:52 -0800
+Subject: binder: use wake_up_pollfree()
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit a880b28a71e39013e357fd3adccd1d8a31bc69a8 upstream.
+
+wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up
+all exclusive waiters.  Yet, POLLFREE *must* wake up all waiters.  epoll
+and aio poll are fortunately not affected by this, but it's very
+fragile.  Thus, the new function wake_up_pollfree() has been introduced.
+
+Convert binder to use wake_up_pollfree().
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: f5cb779ba163 ("ANDROID: binder: remove waitqueue when thread exits.")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-3-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/android/binder.c |   21 +++++++++------------
+ 1 file changed, 9 insertions(+), 12 deletions(-)
+
+--- a/drivers/android/binder.c
++++ b/drivers/android/binder.c
+@@ -4784,23 +4784,20 @@ static int binder_thread_release(struct
+       __release(&t->lock);
+ 
+       /*
+-       * If this thread used poll, make sure we remove the waitqueue
+-       * from any epoll data structures holding it with POLLFREE.
+-       * waitqueue_active() is safe to use here because we're holding
+-       * the inner lock.
++       * If this thread used poll, make sure we remove the waitqueue from any
++       * poll data structures holding it.
+        */
+-      if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
+-          waitqueue_active(&thread->wait)) {
+-              wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE);
+-      }
++      if (thread->looper & BINDER_LOOPER_STATE_POLL)
++              wake_up_pollfree(&thread->wait);
+ 
+       binder_inner_proc_unlock(thread->proc);
+ 
+       /*
+-       * This is needed to avoid races between wake_up_poll() above and
+-       * and ep_remove_waitqueue() called for other reasons (eg the epoll file
+-       * descriptor being closed); ep_remove_waitqueue() holds an RCU read
+-       * lock, so we can be sure it's done after calling synchronize_rcu().
++       * This is needed to avoid races between wake_up_pollfree() above and
++       * someone else removing the last entry from the queue for other reasons
++       * (e.g. ep_remove_wait_queue() being called due to an epoll file
++       * descriptor being closed).  Such other users hold an RCU read lock, so
++       * we can be sure they're done after we call synchronize_rcu().
+        */
+       if (thread->looper & BINDER_LOOPER_STATE_POLL)
+               synchronize_rcu();
diff --git a/queue-5.10/net-mvpp2-fix-xdp-rx-queues-registering.patch b/queue-5.10/net-mvpp2-fix-xdp-rx-queues-registering.patch

new file mode 100644 (file)

index 0000000..7254015
--- /dev/null
+++ b/queue-5.10/net-mvpp2-fix-xdp-rx-queues-registering.patch
@@ -0,0 +1,112 @@
+From a50e659b2a1be14784e80f8492aab177e67c53a2 Mon Sep 17 00:00:00 2001
+From: Louis Amas <louis.amas@eho.link>
+Date: Tue, 7 Dec 2021 15:34:22 +0100
+Subject: net: mvpp2: fix XDP rx queues registering
+
+From: Louis Amas <louis.amas@eho.link>
+
+commit a50e659b2a1be14784e80f8492aab177e67c53a2 upstream.
+
+The registration of XDP queue information is incorrect because the
+RX queue id we use is invalid. When port->id == 0 it appears to works
+as expected yet it's no longer the case when port->id != 0.
+
+The problem arised while using a recent kernel version on the
+MACCHIATOBin. This board has several ports:
+ * eth0 and eth1 are 10Gbps interfaces ; both ports has port->id == 0;
+ * eth2 is a 1Gbps interface with port->id != 0.
+
+Code from xdp-tutorial (more specifically advanced03-AF_XDP) was used
+to test packet capture and injection on all these interfaces. The XDP
+kernel was simplified to:
+
+       SEC("xdp_sock")
+       int xdp_sock_prog(struct xdp_md *ctx)
+       {
+               int index = ctx->rx_queue_index;
+
+               /* A set entry here means that the correspnding queue_id
+               * has an active AF_XDP socket bound to it. */
+               if (bpf_map_lookup_elem(&xsks_map, &index))
+                       return bpf_redirect_map(&xsks_map, index, 0);
+
+               return XDP_PASS;
+       }
+
+Starting the program using:
+
+       ./af_xdp_user -d DEV
+
+Gives the following result:
+
+ * eth0 : ok
+ * eth1 : ok
+ * eth2 : no capture, no injection
+
+Investigating the issue shows that XDP rx queues for eth2 are wrong:
+XDP expects their id to be in the range [0..3] but we found them to be
+in the range [32..35].
+
+Trying to force rx queue ids using:
+
+       ./af_xdp_user -d eth2 -Q 32
+
+fails as expected (we shall not have more than 4 queues).
+
+When we register the XDP rx queue information (using
+xdp_rxq_info_reg() in function mvpp2_rxq_init()) we tell it to use
+rxq->id as the queue id. This value is computed as:
+
+       rxq->id = port->id * max_rxq_count + queue_id
+
+where max_rxq_count depends on the device version. In the MACCHIATOBin
+case, this value is 32, meaning that rx queues on eth2 are numbered
+from 32 to 35 - there are four of them.
+
+Clearly, this is not the per-port queue id that XDP is expecting:
+it wants a value in the range [0..3]. It shall directly use queue_id
+which is stored in rxq->logic_rxq -- so let's use that value instead.
+
+rxq->id is left untouched ; its value is indeed valid but it should
+not be used in this context.
+
+This is consistent with the remaining part of the code in
+mvpp2_rxq_init().
+
+With this change, packet capture is working as expected on all the
+MACCHIATOBin ports.
+
+Fixes: b27db2274ba8 ("mvpp2: use page_pool allocator")
+Signed-off-by: Louis Amas <louis.amas@eho.link>
+Signed-off-by: Emmanuel Deloget <emmanuel.deloget@eho.link>
+Reviewed-by: Marcin Wojtas <mw@semihalf.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
+Link: https://lore.kernel.org/r/20211207143423.916334-1-louis.amas@eho.link
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+index 6480696c979b..6da8a595026b 100644
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -2960,11 +2960,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
+       mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size);
+ 
+       if (priv->percpu_pools) {
+-              err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id);
++              err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->logic_rxq);
+               if (err < 0)
+                       goto err_free_dma;
+ 
+-              err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id);
++              err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->logic_rxq);
+               if (err < 0)
+                       goto err_unregister_rxq_short;
+ 
+-- 
+2.25.1
+
diff --git a/queue-5.10/series b/queue-5.10/series

index 55ac3ddace513b76787be7232f6e856ca6851576..f3e5b09c641820b43bf9276e4ea1fde0f66de0fe 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -61,3 +61,9 @@ drm-syncobj-deal-with-signalled-fences-in-drm_syncobj_find_fence.patch
  can-pch_can-pch_can_rx_normal-fix-use-after-free.patch
  can-m_can-disable-and-ignore-elo-interrupt.patch
  libata-add-horkage-for-asmedia-1092.patch
+wait-add-wake_up_pollfree.patch
+binder-use-wake_up_pollfree.patch
+signalfd-use-wake_up_pollfree.patch
+aio-keep-poll-requests-on-waitqueue-until-completed.patch
+aio-fix-use-after-free-due-to-missing-pollfree-handling.patch
+net-mvpp2-fix-xdp-rx-queues-registering.patch
diff --git a/queue-5.10/signalfd-use-wake_up_pollfree.patch b/queue-5.10/signalfd-use-wake_up_pollfree.patch

new file mode 100644 (file)

index 0000000..8cc0968
--- /dev/null
+++ b/queue-5.10/signalfd-use-wake_up_pollfree.patch
@@ -0,0 +1,52 @@
+From 9537bae0da1f8d1e2361ab6d0479e8af7824e160 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Wed, 8 Dec 2021 17:04:53 -0800
+Subject: signalfd: use wake_up_pollfree()
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 9537bae0da1f8d1e2361ab6d0479e8af7824e160 upstream.
+
+wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up
+all exclusive waiters.  Yet, POLLFREE *must* wake up all waiters.  epoll
+and aio poll are fortunately not affected by this, but it's very
+fragile.  Thus, the new function wake_up_pollfree() has been introduced.
+
+Convert signalfd to use wake_up_pollfree().
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Fixes: d80e731ecab4 ("epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-4-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/signalfd.c | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+diff --git a/fs/signalfd.c b/fs/signalfd.c
+index 040e1cf90528..65ce0e72e7b9 100644
+--- a/fs/signalfd.c
++++ b/fs/signalfd.c
+@@ -35,17 +35,7 @@
+ 
+ void signalfd_cleanup(struct sighand_struct *sighand)
+ {
+-      wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+-      /*
+-       * The lockless check can race with remove_wait_queue() in progress,
+-       * but in this case its caller should run under rcu_read_lock() and
+-       * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
+-       */
+-      if (likely(!waitqueue_active(wqh)))
+-              return;
+-
+-      /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
+-      wake_up_poll(wqh, EPOLLHUP | POLLFREE);
++      wake_up_pollfree(&sighand->signalfd_wqh);
+ }
+ 
+ struct signalfd_ctx {
+-- 
+2.34.1
+
diff --git a/queue-5.10/wait-add-wake_up_pollfree.patch b/queue-5.10/wait-add-wake_up_pollfree.patch

new file mode 100644 (file)

index 0000000..756e493
--- /dev/null
+++ b/queue-5.10/wait-add-wake_up_pollfree.patch
@@ -0,0 +1,105 @@
+From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Wed, 8 Dec 2021 17:04:51 -0800
+Subject: wait: add wake_up_pollfree()
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 upstream.
+
+Several ->poll() implementations are special in that they use a
+waitqueue whose lifetime is the current task, rather than the struct
+file as is normally the case.  This is okay for blocking polls, since a
+blocking poll occurs within one task; however, non-blocking polls
+require another solution.  This solution is for the queue to be cleared
+before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'.
+
+However, that has a bug: wake_up_poll() calls __wake_up() with
+nr_exclusive=1.  Therefore, if there are multiple "exclusive" waiters,
+and the wakeup function for the first one returns a positive value, only
+that one will be called.  That's *not* what's needed for POLLFREE;
+POLLFREE is special in that it really needs to wake up everyone.
+
+Considering the three non-blocking poll systems:
+
+- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway.
+
+- aio poll is unaffected, since it doesn't support exclusive waits.
+  However, that's fragile, as someone could add this feature later.
+
+- epoll doesn't appear to be broken by this, since its wakeup function
+  returns 0 when it sees POLLFREE.  But this is fragile.
+
+Although there is a workaround (see epoll), it's better to define a
+function which always sends POLLFREE to all waiters.  Add such a
+function.  Also make it verify that the queue really becomes empty after
+all waiters have been woken up.
+
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/wait.h |   26 ++++++++++++++++++++++++++
+ kernel/sched/wait.c  |    7 +++++++
+ 2 files changed, 33 insertions(+)
+
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -207,6 +207,7 @@ void __wake_up_sync_key(struct wait_queu
+ void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
++void __wake_up_pollfree(struct wait_queue_head *wq_head);
+ 
+ #define wake_up(x)                    __wake_up(x, TASK_NORMAL, 1, NULL)
+ #define wake_up_nr(x, nr)             __wake_up(x, TASK_NORMAL, nr, NULL)
+@@ -235,6 +236,31 @@ void __wake_up_sync(struct wait_queue_he
+ #define wake_up_interruptible_sync_poll_locked(x, m)                          \
+       __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
+ 
++/**
++ * wake_up_pollfree - signal that a polled waitqueue is going away
++ * @wq_head: the wait queue head
++ *
++ * In the very rare cases where a ->poll() implementation uses a waitqueue whose
++ * lifetime is tied to a task rather than to the 'struct file' being polled,
++ * this function must be called before the waitqueue is freed so that
++ * non-blocking polls (e.g. epoll) are notified that the queue is going away.
++ *
++ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
++ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
++ */
++static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++      /*
++       * For performance reasons, we don't always take the queue lock here.
++       * Therefore, we might race with someone removing the last entry from
++       * the queue, and proceed while they still hold the queue lock.
++       * However, rcu_read_lock() is required to be held in such cases, so we
++       * can safely proceed with an RCU-delayed free.
++       */
++      if (waitqueue_active(wq_head))
++              __wake_up_pollfree(wq_head);
++}
++
+ #define ___wait_cond_timeout(condition)                                               \
+ ({                                                                            \
+       bool __cond = (condition);                                              \
+--- a/kernel/sched/wait.c
++++ b/kernel/sched/wait.c
+@@ -223,6 +223,13 @@ void __wake_up_sync(struct wait_queue_he
+ }
+ EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
+ 
++void __wake_up_pollfree(struct wait_queue_head *wq_head)
++{
++      __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
++      /* POLLFREE must have cleared the queue. */
++      WARN_ON_ONCE(waitqueue_active(wq_head));
++}
++
+ /*
+  * Note: we use "set_current_state()" _after_ the wait-queue add,
+  * because we need a memory barrier there on SMP, so that any
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 12 Dec 2021 13:55:55 +0000 (14:55 +0100)
queue-5.10/aio-fix-use-after-free-due-to-missing-pollfree-handling.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/aio-keep-poll-requests-on-waitqueue-until-completed.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/binder-use-wake_up_pollfree.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-mvpp2-fix-xdp-rx-queues-registering.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/signalfd-use-wake_up_pollfree.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/wait-add-wake_up_pollfree.patch	[new file with mode: 0644]	patch \| blob