From 6cdac8f1a4c84c0afba6778737c74956286c6f0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 2 Jan 2023 11:43:08 +0100 Subject: [PATCH] 6.0-stable patches added patches: eventfd-provide-a-eventfd_signal_mask-helper.patch eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch io_uring-dont-remove-file-from-msg_ring-reqs.patch mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch pstore-properly-assign-mem_type-property.patch pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch --- ...provide-a-eventfd_signal_mask-helper.patch | 120 ++++++++++ ...dd-epoll_uring_wake-poll-wakeup-flag.patch | 119 ++++++++++ ...r-debug-object-leak-on-kcalloc-error.patch | 55 +++++ ...ing-unable-to-be-assigned-with-mount.patch | 71 ++++++ ...-dont-remove-file-from-msg_ring-reqs.patch | 118 ++++++++++ ...-set_mempolicy_home_node-system-call.patch | 51 ++++ ...re-properly-assign-mem_type-property.patch | 42 ++++ ...e-gfp_atomic-to-allocate-zone-buffer.patch | 36 +++ ...r-rtmutex-lock-acquisition-slow-path.patch | 219 ++++++++++++++++++ queue-6.0/series | 9 + 10 files changed, 840 insertions(+) create mode 100644 queue-6.0/eventfd-provide-a-eventfd_signal_mask-helper.patch create mode 100644 queue-6.0/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch create mode 100644 queue-6.0/futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch create mode 100644 queue-6.0/hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch create mode 100644 queue-6.0/io_uring-dont-remove-file-from-msg_ring-reqs.patch create mode 100644 queue-6.0/mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch create mode 100644 queue-6.0/pstore-properly-assign-mem_type-property.patch create mode 100644 queue-6.0/pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch create mode 100644 queue-6.0/rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch diff --git a/queue-6.0/eventfd-provide-a-eventfd_signal_mask-helper.patch b/queue-6.0/eventfd-provide-a-eventfd_signal_mask-helper.patch new file mode 100644 index 00000000000..6e204e824c8 --- /dev/null +++ b/queue-6.0/eventfd-provide-a-eventfd_signal_mask-helper.patch @@ -0,0 +1,120 @@ +From 03e02acda8e267a8183e1e0ed289ff1ef9cd7ed8 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 20 Nov 2022 10:13:44 -0700 +Subject: eventfd: provide a eventfd_signal_mask() helper + +From: Jens Axboe + +commit 03e02acda8e267a8183e1e0ed289ff1ef9cd7ed8 upstream. + +This is identical to eventfd_signal(), but it allows the caller to pass +in a mask to be used for the poll wakeup key. The use case is avoiding +repeated multishot triggers if we have a dependency between eventfd and +io_uring. + +If we setup an eventfd context and register that as the io_uring eventfd, +and at the same time queue a multishot poll request for the eventfd +context, then any CQE posted will repeatedly trigger the multishot request +until it terminates when the CQ ring overflows. + +In preparation for io_uring detecting this circular dependency, add the +mentioned helper so that io_uring can pass in EPOLL_URING as part of the +poll wakeup key. + +Cc: stable@vger.kernel.org # 6.0 +[axboe: fold in !CONFIG_EVENTFD fix from Zhang Qilong] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/eventfd.c | 37 +++++++++++++++++++++---------------- + include/linux/eventfd.h | 7 +++++++ + 2 files changed, 28 insertions(+), 16 deletions(-) + +diff --git a/fs/eventfd.c b/fs/eventfd.c +index c0ffee99ad23..249ca6c0b784 100644 +--- a/fs/eventfd.c ++++ b/fs/eventfd.c +@@ -43,21 +43,7 @@ struct eventfd_ctx { + int id; + }; + +-/** +- * eventfd_signal - Adds @n to the eventfd counter. +- * @ctx: [in] Pointer to the eventfd context. +- * @n: [in] Value of the counter to be added to the eventfd internal counter. +- * The value cannot be negative. +- * +- * This function is supposed to be called by the kernel in paths that do not +- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX +- * value, and we signal this as overflow condition by returning a EPOLLERR +- * to poll(2). +- * +- * Returns the amount by which the counter was incremented. This will be less +- * than @n if the counter has overflowed. +- */ +-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) ++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) + { + unsigned long flags; + +@@ -78,12 +64,31 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) + n = ULLONG_MAX - ctx->count; + ctx->count += n; + if (waitqueue_active(&ctx->wqh)) +- wake_up_locked_poll(&ctx->wqh, EPOLLIN); ++ wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); + current->in_eventfd = 0; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return n; + } ++ ++/** ++ * eventfd_signal - Adds @n to the eventfd counter. ++ * @ctx: [in] Pointer to the eventfd context. ++ * @n: [in] Value of the counter to be added to the eventfd internal counter. ++ * The value cannot be negative. ++ * ++ * This function is supposed to be called by the kernel in paths that do not ++ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX ++ * value, and we signal this as overflow condition by returning a EPOLLERR ++ * to poll(2). ++ * ++ * Returns the amount by which the counter was incremented. This will be less ++ * than @n if the counter has overflowed. ++ */ ++__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) ++{ ++ return eventfd_signal_mask(ctx, n, 0); ++} + EXPORT_SYMBOL_GPL(eventfd_signal); + + static void eventfd_free_ctx(struct eventfd_ctx *ctx) +diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h +index 30eb30d6909b..786824f58d3d 100644 +--- a/include/linux/eventfd.h ++++ b/include/linux/eventfd.h +@@ -40,6 +40,7 @@ struct file *eventfd_fget(int fd); + struct eventfd_ctx *eventfd_ctx_fdget(int fd); + struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); + __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); ++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, + __u64 *cnt); + void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); +@@ -66,6 +67,12 @@ static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) + return -ENOSYS; + } + ++static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, ++ unsigned mask) ++{ ++ return -ENOSYS; ++} ++ + static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) + { + +-- +2.39.0 + diff --git a/queue-6.0/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch b/queue-6.0/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch new file mode 100644 index 00000000000..c84b2dfaf5f --- /dev/null +++ b/queue-6.0/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch @@ -0,0 +1,119 @@ +From caf1aeaffc3b09649a56769e559333ae2c4f1802 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 20 Nov 2022 10:10:53 -0700 +Subject: eventpoll: add EPOLL_URING_WAKE poll wakeup flag + +From: Jens Axboe + +commit caf1aeaffc3b09649a56769e559333ae2c4f1802 upstream. + +We can have dependencies between epoll and io_uring. Consider an epoll +context, identified by the epfd file descriptor, and an io_uring file +descriptor identified by iofd. If we add iofd to the epfd context, and +arm a multishot poll request for epfd with iofd, then the multishot +poll request will repeatedly trigger and generate events until terminated +by CQ ring overflow. This isn't a desired behavior. + +Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup +key, and io_uring can check for that to detect a potential recursive +invocation. + +Cc: stable@vger.kernel.org # 6.0 +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/eventpoll.c | 18 ++++++++++-------- + include/uapi/linux/eventpoll.h | 6 ++++++ + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/fs/eventpoll.c b/fs/eventpoll.c +index 52954d4637b5..64659b110973 100644 +--- a/fs/eventpoll.c ++++ b/fs/eventpoll.c +@@ -491,7 +491,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) + */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC + +-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) ++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, ++ unsigned pollflags) + { + struct eventpoll *ep_src; + unsigned long flags; +@@ -522,16 +523,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) + } + spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); + ep->nests = nests + 1; +- wake_up_locked_poll(&ep->poll_wait, EPOLLIN); ++ wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); + ep->nests = 0; + spin_unlock_irqrestore(&ep->poll_wait.lock, flags); + } + + #else + +-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) ++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, ++ unsigned pollflags) + { +- wake_up_poll(&ep->poll_wait, EPOLLIN); ++ wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); + } + + #endif +@@ -742,7 +744,7 @@ static void ep_free(struct eventpoll *ep) + + /* We need to release all tasks waiting for these file */ + if (waitqueue_active(&ep->poll_wait)) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + /* + * We need to lock this because we could be hit by +@@ -1208,7 +1210,7 @@ out_unlock: + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, epi); ++ ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); + + if (!(epi->event.events & EPOLLEXCLUSIVE)) + ewake = 1; +@@ -1553,7 +1555,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + return 0; + } +@@ -1629,7 +1631,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + return 0; + } +diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h +index 8a3432d0f0dc..e687658843b1 100644 +--- a/include/uapi/linux/eventpoll.h ++++ b/include/uapi/linux/eventpoll.h +@@ -41,6 +41,12 @@ + #define EPOLLMSG (__force __poll_t)0x00000400 + #define EPOLLRDHUP (__force __poll_t)0x00002000 + ++/* ++ * Internal flag - wakeup generated by io_uring, used to detect recursion back ++ * into the io_uring poll handler. ++ */ ++#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27)) ++ + /* Set exclusive wakeup mode for the target file descriptor */ + #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28)) + +-- +2.39.0 + diff --git a/queue-6.0/futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch b/queue-6.0/futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch new file mode 100644 index 00000000000..3408145c1e5 --- /dev/null +++ b/queue-6.0/futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch @@ -0,0 +1,55 @@ +From 94cd8fa09f5f1ebdd4e90964b08b7f2cc4b36c43 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers +Date: Wed, 14 Dec 2022 17:20:08 -0500 +Subject: futex: Fix futex_waitv() hrtimer debug object leak on kcalloc error + +From: Mathieu Desnoyers + +commit 94cd8fa09f5f1ebdd4e90964b08b7f2cc4b36c43 upstream. + +In a scenario where kcalloc() fails to allocate memory, the futex_waitv +system call immediately returns -ENOMEM without invoking +destroy_hrtimer_on_stack(). When CONFIG_DEBUG_OBJECTS_TIMERS=y, this +results in leaking a timer debug object. + +Fixes: bf69bad38cf6 ("futex: Implement sys_futex_waitv()") +Signed-off-by: Mathieu Desnoyers +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Davidlohr Bueso +Cc: stable@vger.kernel.org +Cc: stable@vger.kernel.org # v5.16+ +Link: https://lore.kernel.org/r/20221214222008.200393-1-mathieu.desnoyers@efficios.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/futex/syscalls.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/futex/syscalls.c ++++ b/kernel/futex/syscalls.c +@@ -286,19 +286,22 @@ SYSCALL_DEFINE5(futex_waitv, struct fute + } + + futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); +- if (!futexv) +- return -ENOMEM; ++ if (!futexv) { ++ ret = -ENOMEM; ++ goto destroy_timer; ++ } + + ret = futex_parse_waitv(futexv, waiters, nr_futexes); + if (!ret) + ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); + ++ kfree(futexv); ++ ++destroy_timer: + if (timeout) { + hrtimer_cancel(&to.timer); + destroy_hrtimer_on_stack(&to.timer); + } +- +- kfree(futexv); + return ret; + } + diff --git a/queue-6.0/hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch b/queue-6.0/hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch new file mode 100644 index 00000000000..f5efaa7c268 --- /dev/null +++ b/queue-6.0/hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch @@ -0,0 +1,71 @@ +From 9f2b5debc07073e6dfdd774e3594d0224b991927 Mon Sep 17 00:00:00 2001 +From: Aditya Garg +Date: Wed, 7 Dec 2022 03:05:40 +0000 +Subject: hfsplus: fix bug causing custom uid and gid being unable to be assigned with mount + +From: Aditya Garg + +commit 9f2b5debc07073e6dfdd774e3594d0224b991927 upstream. + +Despite specifying UID and GID in mount command, the specified UID and GID +were not being assigned. This patch fixes this issue. + +Link: https://lkml.kernel.org/r/C0264BF5-059C-45CF-B8DA-3A3BD2C803A2@live.com +Signed-off-by: Aditya Garg +Reviewed-by: Viacheslav Dubeyko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/hfsplus/hfsplus_fs.h | 2 ++ + fs/hfsplus/inode.c | 4 ++-- + fs/hfsplus/options.c | 4 ++++ + 3 files changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/hfsplus/hfsplus_fs.h ++++ b/fs/hfsplus/hfsplus_fs.h +@@ -198,6 +198,8 @@ struct hfsplus_sb_info { + #define HFSPLUS_SB_HFSX 3 + #define HFSPLUS_SB_CASEFOLD 4 + #define HFSPLUS_SB_NOBARRIER 5 ++#define HFSPLUS_SB_UID 6 ++#define HFSPLUS_SB_GID 7 + + static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) + { +--- a/fs/hfsplus/inode.c ++++ b/fs/hfsplus/inode.c +@@ -192,11 +192,11 @@ static void hfsplus_get_perms(struct ino + mode = be16_to_cpu(perms->mode); + + i_uid_write(inode, be32_to_cpu(perms->owner)); +- if (!i_uid_read(inode) && !mode) ++ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode)) + inode->i_uid = sbi->uid; + + i_gid_write(inode, be32_to_cpu(perms->group)); +- if (!i_gid_read(inode) && !mode) ++ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode)) + inode->i_gid = sbi->gid; + + if (dir) { +--- a/fs/hfsplus/options.c ++++ b/fs/hfsplus/options.c +@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, s + if (!uid_valid(sbi->uid)) { + pr_err("invalid uid specified\n"); + return 0; ++ } else { ++ set_bit(HFSPLUS_SB_UID, &sbi->flags); + } + break; + case opt_gid: +@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, s + if (!gid_valid(sbi->gid)) { + pr_err("invalid gid specified\n"); + return 0; ++ } else { ++ set_bit(HFSPLUS_SB_GID, &sbi->flags); + } + break; + case opt_part: diff --git a/queue-6.0/io_uring-dont-remove-file-from-msg_ring-reqs.patch b/queue-6.0/io_uring-dont-remove-file-from-msg_ring-reqs.patch new file mode 100644 index 00000000000..6331251bc1b --- /dev/null +++ b/queue-6.0/io_uring-dont-remove-file-from-msg_ring-reqs.patch @@ -0,0 +1,118 @@ +From ef0ec1ad03119b8b46b035dad42bca7d6da7c2e5 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 7 Dec 2022 03:53:26 +0000 +Subject: io_uring: dont remove file from msg_ring reqs + +From: Pavel Begunkov + +commit ef0ec1ad03119b8b46b035dad42bca7d6da7c2e5 upstream. + +We should not be messing with req->file outside of core paths. Clearing +it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the +request on failed io_double_lock_ctx() but clearly was originally +intended to do retries instead. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/e5ac9edadb574fe33f6d727cb8f14ce68262a684.1670384893.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + io_uring/msg_ring.c | 4 ---- + io_uring/opdef.c | 7 +++++++ + io_uring/opdef.h | 2 ++ + 4 files changed, 10 insertions(+), 5 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1607,7 +1607,7 @@ static int io_issue_sqe(struct io_kiocb + return ret; + + /* If the op doesn't have a file, we're not polling for it */ +- if ((req->ctx->flags & IORING_SETUP_IOPOLL) && req->file) ++ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) + io_iopoll_req_issued(req, issue_flags); + + return 0; +--- a/io_uring/msg_ring.c ++++ b/io_uring/msg_ring.c +@@ -169,9 +169,5 @@ done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); +- /* put file to avoid an attempt to IOPOLL the req */ +- if (!(req->flags & REQ_F_FIXED_FILE)) +- io_put_file(req->file); +- req->file = NULL; + return IOU_OK; + } +--- a/io_uring/opdef.c ++++ b/io_uring/opdef.c +@@ -63,6 +63,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READV", + .prep = io_prep_rw, +@@ -80,6 +81,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITEV", + .prep = io_prep_rw, +@@ -103,6 +105,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READ_FIXED", + .prep = io_prep_rw, +@@ -118,6 +121,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITE_FIXED", + .prep = io_prep_rw, +@@ -275,6 +279,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "READ", + .prep = io_prep_rw, +@@ -290,6 +295,7 @@ const struct io_op_def io_op_defs[] = { + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, ++ .iopoll_queue = 1, + .async_size = sizeof(struct io_async_rw), + .name = "WRITE", + .prep = io_prep_rw, +@@ -475,6 +481,7 @@ const struct io_op_def io_op_defs[] = { + .needs_file = 1, + .plug = 1, + .name = "URING_CMD", ++ .iopoll_queue = 1, + .async_size = uring_cmd_pdu_size(1), + .prep = io_uring_cmd_prep, + .issue = io_uring_cmd, +--- a/io_uring/opdef.h ++++ b/io_uring/opdef.h +@@ -25,6 +25,8 @@ struct io_op_def { + unsigned ioprio : 1; + /* supports iopoll */ + unsigned iopoll : 1; ++ /* have to be put into the iopoll list */ ++ unsigned iopoll_queue : 1; + /* opcode specific path will handle ->async_data allocation if needed */ + unsigned manual_alloc : 1; + /* size of async data needed, if any */ diff --git a/queue-6.0/mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch b/queue-6.0/mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch new file mode 100644 index 00000000000..e94e2230eca --- /dev/null +++ b/queue-6.0/mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch @@ -0,0 +1,51 @@ +From 38ce7c9bdfc228c14d7621ba36d3eebedd9d4f76 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers +Date: Thu, 15 Dec 2022 14:46:21 -0500 +Subject: mm/mempolicy: fix memory leak in set_mempolicy_home_node system call + +From: Mathieu Desnoyers + +commit 38ce7c9bdfc228c14d7621ba36d3eebedd9d4f76 upstream. + +When encountering any vma in the range with policy other than MPOL_BIND or +MPOL_PREFERRED_MANY, an error is returned without issuing a mpol_put on +the policy just allocated with mpol_dup(). + +This allows arbitrary users to leak kernel memory. + +Link: https://lkml.kernel.org/r/20221215194621.202816-1-mathieu.desnoyers@efficios.com +Fixes: c6018b4b2549 ("mm/mempolicy: add set_mempolicy_home_node syscall") +Signed-off-by: Mathieu Desnoyers +Reviewed-by: Randy Dunlap +Reviewed-by: "Huang, Ying" +Reviewed-by: Aneesh Kumar K.V +Acked-by: Michal Hocko +Cc: Aneesh Kumar K.V +Cc: Dave Hansen +Cc: Feng Tang +Cc: Michal Hocko +Cc: Andrea Arcangeli +Cc: Mel Gorman +Cc: Mike Kravetz +Cc: Randy Dunlap +Cc: Vlastimil Babka +Cc: Andi Kleen +Cc: Dan Williams +Cc: Huang Ying +Cc: [5.17+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mempolicy.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1525,6 +1525,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, + * the home node for vmas we already updated before. + */ + if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) { ++ mpol_put(new); + err = -EOPNOTSUPP; + break; + } diff --git a/queue-6.0/pstore-properly-assign-mem_type-property.patch b/queue-6.0/pstore-properly-assign-mem_type-property.patch new file mode 100644 index 00000000000..d3b34e8be97 --- /dev/null +++ b/queue-6.0/pstore-properly-assign-mem_type-property.patch @@ -0,0 +1,42 @@ +From beca3e311a49cd3c55a056096531737d7afa4361 Mon Sep 17 00:00:00 2001 +From: Luca Stefani +Date: Thu, 22 Dec 2022 14:10:49 +0100 +Subject: pstore: Properly assign mem_type property + +From: Luca Stefani + +commit beca3e311a49cd3c55a056096531737d7afa4361 upstream. + +If mem-type is specified in the device tree +it would end up overriding the record_size +field instead of populating mem_type. + +As record_size is currently parsed after the +improper assignment with default size 0 it +continued to work as expected regardless of the +value found in the device tree. + +Simply changing the target field of the struct +is enough to get mem-type working as expected. + +Fixes: 9d843e8fafc7 ("pstore: Add mem_type property DT parsing support") +Cc: stable@vger.kernel.org +Signed-off-by: Luca Stefani +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221222131049.286288-1-luca@osomprivacy.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/pstore/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/pstore/ram.c ++++ b/fs/pstore/ram.c +@@ -670,7 +670,7 @@ static int ramoops_parse_dt(struct platf + field = value; \ + } + +- parse_u32("mem-type", pdata->record_size, pdata->mem_type); ++ parse_u32("mem-type", pdata->mem_type, pdata->mem_type); + parse_u32("record-size", pdata->record_size, 0); + parse_u32("console-size", pdata->console_size, 0); + parse_u32("ftrace-size", pdata->ftrace_size, 0); diff --git a/queue-6.0/pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch b/queue-6.0/pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch new file mode 100644 index 00000000000..13758abb427 --- /dev/null +++ b/queue-6.0/pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch @@ -0,0 +1,36 @@ +From 99b3b837855b987563bcfb397cf9ddd88262814b Mon Sep 17 00:00:00 2001 +From: Qiujun Huang +Date: Sun, 4 Sep 2022 23:17:13 +0800 +Subject: pstore/zone: Use GFP_ATOMIC to allocate zone buffer + +From: Qiujun Huang + +commit 99b3b837855b987563bcfb397cf9ddd88262814b upstream. + +There is a case found when triggering a panic_on_oom, pstore fails to dump +kmsg. Because psz_kmsg_write_record can't get the new buffer. + +Handle this by using GFP_ATOMIC to allocate a buffer at lower watermark. + +Signed-off-by: Qiujun Huang +Fixes: 335426c6dcdd ("pstore/zone: Provide way to skip "broken" zone for MTD devices") +Cc: WeiXiong Liao +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/CAJRQjofRCF7wjrYmw3D7zd5QZnwHQq+F8U-mJDJ6NZ4bddYdLA@mail.gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + fs/pstore/zone.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/pstore/zone.c ++++ b/fs/pstore/zone.c +@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write + /* avoid destroying old data, allocate a new one */ + len = zone->buffer_size + sizeof(*zone->buffer); + zone->oldbuf = zone->buffer; +- zone->buffer = kzalloc(len, GFP_KERNEL); ++ zone->buffer = kzalloc(len, GFP_ATOMIC); + if (!zone->buffer) { + zone->buffer = zone->oldbuf; + return -ENOMEM; diff --git a/queue-6.0/rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch b/queue-6.0/rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch new file mode 100644 index 00000000000..868afa775de --- /dev/null +++ b/queue-6.0/rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch @@ -0,0 +1,219 @@ +From 1c0908d8e441631f5b8ba433523cf39339ee2ba0 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Fri, 2 Dec 2022 10:02:23 +0000 +Subject: rtmutex: Add acquire semantics for rtmutex lock acquisition slow path + +From: Mel Gorman + +commit 1c0908d8e441631f5b8ba433523cf39339ee2ba0 upstream. + +Jan Kara reported the following bug triggering on 6.0.5-rt14 running dbench +on XFS on arm64. + + kernel BUG at fs/inode.c:625! + Internal error: Oops - BUG: 0 [#1] PREEMPT_RT SMP + CPU: 11 PID: 6611 Comm: dbench Tainted: G E 6.0.0-rt14-rt+ #1 + pc : clear_inode+0xa0/0xc0 + lr : clear_inode+0x38/0xc0 + Call trace: + clear_inode+0xa0/0xc0 + evict+0x160/0x180 + iput+0x154/0x240 + do_unlinkat+0x184/0x300 + __arm64_sys_unlinkat+0x48/0xc0 + el0_svc_common.constprop.4+0xe4/0x2c0 + do_el0_svc+0xac/0x100 + el0_svc+0x78/0x200 + el0t_64_sync_handler+0x9c/0xc0 + el0t_64_sync+0x19c/0x1a0 + +It also affects 6.1-rc7-rt5 and affects a preempt-rt fork of 5.14 so this +is likely a bug that existed forever and only became visible when ARM +support was added to preempt-rt. The same problem does not occur on x86-64 +and he also reported that converting sb->s_inode_wblist_lock to +raw_spinlock_t makes the problem disappear indicating that the RT spinlock +variant is the problem. + +Which in turn means that RT mutexes on ARM64 and any other weakly ordered +architecture are affected by this independent of RT. + +Will Deacon observed: + + "I'd be more inclined to be suspicious of the slowpath tbh, as we need to + make sure that we have acquire semantics on all paths where the lock can + be taken. Looking at the rtmutex code, this really isn't obvious to me + -- for example, try_to_take_rt_mutex() appears to be able to return via + the 'takeit' label without acquire semantics and it looks like we might + be relying on the caller's subsequent _unlock_ of the wait_lock for + ordering, but that will give us release semantics which aren't correct." + +Sebastian Andrzej Siewior prototyped a fix that does work based on that +comment but it was a little bit overkill and added some fences that should +not be necessary. + +The lock owner is updated with an IRQ-safe raw spinlock held, but the +spin_unlock does not provide acquire semantics which are needed when +acquiring a mutex. + +Adds the necessary acquire semantics for lock owner updates in the slow path +acquisition and the waiter bit logic. + +It successfully completed 10 iterations of the dbench workload while the +vanilla kernel fails on the first iteration. + +[ bigeasy@linutronix.de: Initial prototype fix ] + +Fixes: 700318d1d7b38 ("locking/rtmutex: Use acquire/release semantics") +Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core") +Reported-by: Jan Kara +Signed-off-by: Mel Gorman +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20221202100223.6mevpbl7i6x5udfd@techsingularity.net +Signed-off-by: Greg Kroah-Hartman +--- + kernel/locking/rtmutex.c | 55 +++++++++++++++++++++++++++++++++++-------- + kernel/locking/rtmutex_api.c | 6 ++-- + 2 files changed, 49 insertions(+), 12 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -89,15 +89,31 @@ static inline int __ww_mutex_check_kill( + * set this bit before looking at the lock. + */ + +-static __always_inline void +-rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) ++static __always_inline struct task_struct * ++rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) + { + unsigned long val = (unsigned long)owner; + + if (rt_mutex_has_waiters(lock)) + val |= RT_MUTEX_HAS_WAITERS; + +- WRITE_ONCE(lock->owner, (struct task_struct *)val); ++ return (struct task_struct *)val; ++} ++ ++static __always_inline void ++rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) ++{ ++ /* ++ * lock->wait_lock is held but explicit acquire semantics are needed ++ * for a new lock owner so WRITE_ONCE is insufficient. ++ */ ++ xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); ++} ++ ++static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) ++{ ++ /* lock->wait_lock is held so the unlock provides release semantics. */ ++ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); + } + + static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) +@@ -106,7 +122,8 @@ static __always_inline void clear_rt_mut + ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); + } + +-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) ++static __always_inline void ++fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) + { + unsigned long owner, *p = (unsigned long *) &lock->owner; + +@@ -172,8 +189,21 @@ static __always_inline void fixup_rt_mut + * still set. + */ + owner = READ_ONCE(*p); +- if (owner & RT_MUTEX_HAS_WAITERS) +- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); ++ if (owner & RT_MUTEX_HAS_WAITERS) { ++ /* ++ * See rt_mutex_set_owner() and rt_mutex_clear_owner() on ++ * why xchg_acquire() is used for updating owner for ++ * locking and WRITE_ONCE() for unlocking. ++ * ++ * WRITE_ONCE() would work for the acquire case too, but ++ * in case that the lock acquisition failed it might ++ * force other lockers into the slow path unnecessarily. ++ */ ++ if (acquire_lock) ++ xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); ++ else ++ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); ++ } + } + + /* +@@ -208,6 +238,13 @@ static __always_inline void mark_rt_mute + owner = *p; + } while (cmpxchg_relaxed(p, owner, + owner | RT_MUTEX_HAS_WAITERS) != owner); ++ ++ /* ++ * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE ++ * operations in the event of contention. Ensure the successful ++ * cmpxchg is visible. ++ */ ++ smp_mb__after_atomic(); + } + + /* +@@ -1243,7 +1280,7 @@ static int __sched __rt_mutex_slowtryloc + * try_to_take_rt_mutex() sets the lock waiters bit + * unconditionally. Clean this up. + */ +- fixup_rt_mutex_waiters(lock); ++ fixup_rt_mutex_waiters(lock, true); + + return ret; + } +@@ -1604,7 +1641,7 @@ static int __sched __rt_mutex_slowlock(s + * try_to_take_rt_mutex() sets the waiter bit + * unconditionally. We might have to fix that up. + */ +- fixup_rt_mutex_waiters(lock); ++ fixup_rt_mutex_waiters(lock, true); + + trace_contention_end(lock, ret); + +@@ -1719,7 +1756,7 @@ static void __sched rtlock_slowlock_lock + * try_to_take_rt_mutex() sets the waiter bit unconditionally. + * We might have to fix that up: + */ +- fixup_rt_mutex_waiters(lock); ++ fixup_rt_mutex_waiters(lock, true); + debug_rt_mutex_free_waiter(&waiter); + + trace_contention_end(lock, 0); +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked( + void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) + { + debug_rt_mutex_proxy_unlock(lock); +- rt_mutex_set_owner(lock, NULL); ++ rt_mutex_clear_owner(lock); + } + + /** +@@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(str + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ +- fixup_rt_mutex_waiters(lock); ++ fixup_rt_mutex_waiters(lock, true); + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; +@@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ +- fixup_rt_mutex_waiters(lock); ++ fixup_rt_mutex_waiters(lock, false); + + raw_spin_unlock_irq(&lock->wait_lock); + diff --git a/queue-6.0/series b/queue-6.0/series index 3107d05bc89..febfe338fa7 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -45,3 +45,12 @@ rtc-msc313-fix-function-prototype-mismatch-in-msc313.patch kprobes-kretprobe-events-missing-on-2-core-kvm-guest.patch hid-multitouch-fix-asus-expertbook-p2-p2451fa-trackp.patch hid-plantronics-additional-pids-for-double-volume-ke.patch +futex-fix-futex_waitv-hrtimer-debug-object-leak-on-kcalloc-error.patch +rtmutex-add-acquire-semantics-for-rtmutex-lock-acquisition-slow-path.patch +mm-mempolicy-fix-memory-leak-in-set_mempolicy_home_node-system-call.patch +pstore-properly-assign-mem_type-property.patch +pstore-zone-use-gfp_atomic-to-allocate-zone-buffer.patch +hfsplus-fix-bug-causing-custom-uid-and-gid-being-unable-to-be-assigned-with-mount.patch +eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch +eventfd-provide-a-eventfd_signal_mask-helper.patch +io_uring-dont-remove-file-from-msg_ring-reqs.patch -- 2.47.3