Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)

committer Sasha Levin <sashal@kernel.org>

Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)
author Sasha Levin <sashal@kernel.org>
Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)
committer Sasha Levin <sashal@kernel.org>
Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)
diff --git a/queue-6.1/bpf-resolve-fext-program-type-when-checking-map-comp.patch b/queue-6.1/bpf-resolve-fext-program-type-when-checking-map-comp.patch

new file mode 100644 (file)

index 0000000..d2ac5da
--- /dev/null
+++ b/queue-6.1/bpf-resolve-fext-program-type-when-checking-map-comp.patch
@@ -0,0 +1,73 @@
+From b97e4ecc947c2baab8e19956c2d23c72deba7048 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Dec 2022 00:02:53 +0100
+Subject: bpf: Resolve fext program type when checking map compatibility
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+[ Upstream commit 1c123c567fb138ebd187480b7fc0610fcb0851f5 ]
+
+The bpf_prog_map_compatible() check makes sure that BPF program types are
+not mixed inside BPF map types that can contain programs (tail call maps,
+cpumaps and devmaps). It does this by setting the fields of the map->owner
+struct to the values of the first program being checked against, and
+rejecting any subsequent programs if the values don't match.
+
+One of the values being set in the map owner struct is the program type,
+and since the code did not resolve the prog type for fext programs, the map
+owner type would be set to PROG_TYPE_EXT and subsequent loading of programs
+of the target type into the map would fail.
+
+This bug is seen in particular for XDP programs that are loaded as
+PROG_TYPE_EXT using libxdp; these cannot insert programs into devmaps and
+cpumaps because the check fails as described above.
+
+Fix the bug by resolving the fext program type to its target program type
+as elsewhere in the verifier.
+
+v3:
+- Add Yonghong's ACK
+
+Fixes: f45d5b6ce2e8 ("bpf: generalise tail call map compatibility check")
+Acked-by: Yonghong Song <yhs@fb.com>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20221214230254.790066-1-toke@redhat.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/core.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 25a54e04560e..17ab3e15ac25 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -2088,6 +2088,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
+ bool bpf_prog_map_compatible(struct bpf_map *map,
+                            const struct bpf_prog *fp)
+ {
++      enum bpf_prog_type prog_type = resolve_prog_type(fp);
+       bool ret;
+ 
+       if (fp->kprobe_override)
+@@ -2098,12 +2099,12 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
+               /* There's no owner yet where we could check for
+                * compatibility.
+                */
+-              map->owner.type  = fp->type;
++              map->owner.type  = prog_type;
+               map->owner.jited = fp->jited;
+               map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
+               ret = true;
+       } else {
+-              ret = map->owner.type  == fp->type &&
++              ret = map->owner.type  == prog_type &&
+                     map->owner.jited == fp->jited &&
+                     map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
+       }
+-- 
+2.35.1
+
diff --git a/queue-6.1/eventfd-provide-a-eventfd_signal_mask-helper.patch b/queue-6.1/eventfd-provide-a-eventfd_signal_mask-helper.patch

new file mode 100644 (file)

index 0000000..6274040
--- /dev/null
+++ b/queue-6.1/eventfd-provide-a-eventfd_signal_mask-helper.patch
@@ -0,0 +1,121 @@
+From 4921545396fbdc7e8af48bbf94da171997d95c5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 10:13:44 -0700
+Subject: eventfd: provide a eventfd_signal_mask() helper
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 03e02acda8e267a8183e1e0ed289ff1ef9cd7ed8 ]
+
+This is identical to eventfd_signal(), but it allows the caller to pass
+in a mask to be used for the poll wakeup key. The use case is avoiding
+repeated multishot triggers if we have a dependency between eventfd and
+io_uring.
+
+If we setup an eventfd context and register that as the io_uring eventfd,
+and at the same time queue a multishot poll request for the eventfd
+context, then any CQE posted will repeatedly trigger the multishot request
+until it terminates when the CQ ring overflows.
+
+In preparation for io_uring detecting this circular dependency, add the
+mentioned helper so that io_uring can pass in EPOLL_URING as part of the
+poll wakeup key.
+
+Cc: stable@vger.kernel.org # 6.0
+[axboe: fold in !CONFIG_EVENTFD fix from Zhang Qilong]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 4464853277d0 ("io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventfd.c            | 37 +++++++++++++++++++++----------------
+ include/linux/eventfd.h |  7 +++++++
+ 2 files changed, 28 insertions(+), 16 deletions(-)
+
+diff --git a/fs/eventfd.c b/fs/eventfd.c
+index c0ffee99ad23..249ca6c0b784 100644
+--- a/fs/eventfd.c
++++ b/fs/eventfd.c
+@@ -43,21 +43,7 @@ struct eventfd_ctx {
+       int id;
+ };
+ 
+-/**
+- * eventfd_signal - Adds @n to the eventfd counter.
+- * @ctx: [in] Pointer to the eventfd context.
+- * @n: [in] Value of the counter to be added to the eventfd internal counter.
+- *          The value cannot be negative.
+- *
+- * This function is supposed to be called by the kernel in paths that do not
+- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+- * value, and we signal this as overflow condition by returning a EPOLLERR
+- * to poll(2).
+- *
+- * Returns the amount by which the counter was incremented.  This will be less
+- * than @n if the counter has overflowed.
+- */
+-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
+ {
+       unsigned long flags;
+ 
+@@ -78,12 +64,31 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+               n = ULLONG_MAX - ctx->count;
+       ctx->count += n;
+       if (waitqueue_active(&ctx->wqh))
+-              wake_up_locked_poll(&ctx->wqh, EPOLLIN);
++              wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
+       current->in_eventfd = 0;
+       spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ 
+       return n;
+ }
++
++/**
++ * eventfd_signal - Adds @n to the eventfd counter.
++ * @ctx: [in] Pointer to the eventfd context.
++ * @n: [in] Value of the counter to be added to the eventfd internal counter.
++ *          The value cannot be negative.
++ *
++ * This function is supposed to be called by the kernel in paths that do not
++ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
++ * value, and we signal this as overflow condition by returning a EPOLLERR
++ * to poll(2).
++ *
++ * Returns the amount by which the counter was incremented.  This will be less
++ * than @n if the counter has overflowed.
++ */
++__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++{
++      return eventfd_signal_mask(ctx, n, 0);
++}
+ EXPORT_SYMBOL_GPL(eventfd_signal);
+ 
+ static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
+index 3cd202d3eefb..36a486505b08 100644
+--- a/include/linux/eventfd.h
++++ b/include/linux/eventfd.h
+@@ -40,6 +40,7 @@ struct file *eventfd_fget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
+ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
+                                 __u64 *cnt);
+ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
+@@ -66,6 +67,12 @@ static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+       return -ENOSYS;
+ }
+ 
++static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
++                                    unsigned mask)
++{
++      return -ENOSYS;
++}
++
+ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
+ {
+ 
+-- 
+2.35.1
+
diff --git a/queue-6.1/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch b/queue-6.1/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch

new file mode 100644 (file)

index 0000000..9279c3e
--- /dev/null
+++ b/queue-6.1/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch
@@ -0,0 +1,120 @@
+From 3582fb5f42ec83305c6506be4db4450edc0de53c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 10:10:53 -0700
+Subject: eventpoll: add EPOLL_URING_WAKE poll wakeup flag
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit caf1aeaffc3b09649a56769e559333ae2c4f1802 ]
+
+We can have dependencies between epoll and io_uring. Consider an epoll
+context, identified by the epfd file descriptor, and an io_uring file
+descriptor identified by iofd. If we add iofd to the epfd context, and
+arm a multishot poll request for epfd with iofd, then the multishot
+poll request will repeatedly trigger and generate events until terminated
+by CQ ring overflow. This isn't a desired behavior.
+
+Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup
+key, and io_uring can check for that to detect a potential recursive
+invocation.
+
+Cc: stable@vger.kernel.org # 6.0
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 4464853277d0 ("io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c                 | 18 ++++++++++--------
+ include/uapi/linux/eventpoll.h |  6 ++++++
+ 2 files changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 52954d4637b5..64659b110973 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -491,7 +491,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+  */
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++                           unsigned pollflags)
+ {
+       struct eventpoll *ep_src;
+       unsigned long flags;
+@@ -522,16 +523,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
+       }
+       spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
+       ep->nests = nests + 1;
+-      wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
++      wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
+       ep->nests = 0;
+       spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
+ }
+ 
+ #else
+ 
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++                           unsigned pollflags)
+ {
+-      wake_up_poll(&ep->poll_wait, EPOLLIN);
++      wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
+ }
+ 
+ #endif
+@@ -742,7 +744,7 @@ static void ep_free(struct eventpoll *ep)
+ 
+       /* We need to release all tasks waiting for these file */
+       if (waitqueue_active(&ep->poll_wait))
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+ 
+       /*
+        * We need to lock this because we could be hit by
+@@ -1208,7 +1210,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
+ 
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, epi);
++              ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
+ 
+       if (!(epi->event.events & EPOLLEXCLUSIVE))
+               ewake = 1;
+@@ -1553,7 +1555,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+ 
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+ 
+       return 0;
+ }
+@@ -1629,7 +1631,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+ 
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+ 
+       return 0;
+ }
+diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
+index 8a3432d0f0dc..e687658843b1 100644
+--- a/include/uapi/linux/eventpoll.h
++++ b/include/uapi/linux/eventpoll.h
+@@ -41,6 +41,12 @@
+ #define EPOLLMSG      (__force __poll_t)0x00000400
+ #define EPOLLRDHUP    (__force __poll_t)0x00002000
+ 
++/*
++ * Internal flag - wakeup generated by io_uring, used to detect recursion back
++ * into the io_uring poll handler.
++ */
++#define EPOLL_URING_WAKE      ((__force __poll_t)(1U << 27))
++
+ /* Set exclusive wakeup mode for the target file descriptor */
+ #define EPOLLEXCLUSIVE        ((__force __poll_t)(1U << 28))
+ 
+-- 
+2.35.1
+
diff --git a/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch b/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch

new file mode 100644 (file)

index 0000000..36e02cc
--- /dev/null
+++ b/queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch
@@ -0,0 +1,105 @@
+From 2d15b0d0467a3f2a6f468ae6563fe3f93f0ae1a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 10:18:45 -0700
+Subject: io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 4464853277d0ccdb9914608dd1332f0fa2f9846f ]
+
+Pass in EPOLL_URING_WAKE when signaling eventfd or doing poll related
+wakups, so that we can check for a circular event dependency between
+eventfd and epoll. If this flag is set when our wakeup handlers are
+called, then we know we have a dependency that needs to terminate
+multishot requests.
+
+eventfd and epoll are the only such possible dependencies.
+
+Cc: stable@vger.kernel.org # 6.0
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c |  4 ++--
+ io_uring/io_uring.h | 15 +++++++++++----
+ io_uring/poll.c     |  8 ++++++++
+ 3 files changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 17771cb3c333..71f1cabb9f3d 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -495,7 +495,7 @@ static void io_eventfd_ops(struct rcu_head *rcu)
+       int ops = atomic_xchg(&ev_fd->ops, 0);
+ 
+       if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
+-              eventfd_signal(ev_fd->cq_ev_fd, 1);
++              eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
+ 
+       /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
+        * ordering in a race but if references are 0 we know we have to free
+@@ -531,7 +531,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
+               goto out;
+ 
+       if (likely(eventfd_signal_allowed())) {
+-              eventfd_signal(ev_fd->cq_ev_fd, 1);
++              eventfd_signal_mask(ev_fd->cq_ev_fd, 1, EPOLL_URING_WAKE);
+       } else {
+               atomic_inc(&ev_fd->refs);
+               if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
+diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
+index 50bc3af44953..4334cd30c423 100644
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -4,6 +4,7 @@
+ #include <linux/errno.h>
+ #include <linux/lockdep.h>
+ #include <linux/io_uring_types.h>
++#include <uapi/linux/eventpoll.h>
+ #include "io-wq.h"
+ #include "slist.h"
+ #include "filetable.h"
+@@ -207,12 +208,18 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+ static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
+ {
+       /*
+-       * wake_up_all() may seem excessive, but io_wake_function() and
+-       * io_should_wake() handle the termination of the loop and only
+-       * wake as many waiters as we need to.
++       * Trigger waitqueue handler on all waiters on our waitqueue. This
++       * won't necessarily wake up all the tasks, io_should_wake() will make
++       * that decision.
++       *
++       * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter
++       * set in the mask so that if we recurse back into our own poll
++       * waitqueue handlers, we know we have a dependency between eventfd or
++       * epoll and should terminate multishot poll at that point.
+        */
+       if (waitqueue_active(&ctx->cq_wait))
+-              wake_up_all(&ctx->cq_wait);
++              __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
++                              poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
+ }
+ 
+ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
+diff --git a/io_uring/poll.c b/io_uring/poll.c
+index d9bf1767867e..fded1445a803 100644
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -429,6 +429,14 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+               return 0;
+ 
+       if (io_poll_get_ownership(req)) {
++              /*
++               * If we trigger a multishot poll off our own wakeup path,
++               * disable multishot as there is a circular dependency between
++               * CQ posting and triggering the event.
++               */
++              if (mask & EPOLL_URING_WAKE)
++                      poll->events |= EPOLLONESHOT;
++
+               /* optional, saves extra locking for removal in tw handler */
+               if (mask && poll->events & EPOLLONESHOT) {
+                       list_del_init(&poll->wait.entry);
+-- 
+2.35.1
+
diff --git a/queue-6.1/series b/queue-6.1/series

new file mode 100644 (file)

index 0000000..c3de88f
--- /dev/null
+++ b/queue-6.1/series
@@ -0,0 +1,4 @@
+eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch
+eventfd-provide-a-eventfd_signal_mask-helper.patch
+io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch
+bpf-resolve-fext-program-type-when-checking-map-comp.patch
author	Sasha Levin <sashal@kernel.org>
	Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 31 Dec 2022 20:03:55 +0000 (15:03 -0500)
queue-6.1/bpf-resolve-fext-program-type-when-checking-map-comp.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/eventfd-provide-a-eventfd_signal_mask-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series	[new file with mode: 0644]	patch \| blob