From a731c5a87f28f1d1f17f0d9a36f2c0cc1bf6b191 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Nov 2022 09:16:50 +0100 Subject: [PATCH] 5.10-stable patches added patches: 9p-trans_fd-always-use-o_nonblock-read-write.patch 9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch gfs2-check-sb_bsize_shift-after-reading-superblock.patch gfs2-switch-from-strlcpy-to-strscpy.patch kcm-avoid-potential-race-in-kcm_tx_work.patch kcm-close-race-conditions-on-sk_receive_queue.patch macvlan-enforce-a-consistent-minimal-mtu.patch mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch --- ..._fd-always-use-o_nonblock-read-write.patch | 71 ++++++++ ...conn_cancel-drop-client-lock-earlier.patch | 41 +++++ ...bsize_shift-after-reading-superblock.patch | 59 +++++++ .../gfs2-switch-from-strlcpy-to-strscpy.patch | 55 ++++++ ...-avoid-potential-race-in-kcm_tx_work.patch | 72 ++++++++ ...-race-conditions-on-sk_receive_queue.patch | 165 ++++++++++++++++++ ...lan-enforce-a-consistent-minimal-mtu.patch | 45 +++++ ...d-to-write_begin-write_end-interface.patch | 115 ++++++++++++ queue-5.10/series | 9 + ..._release-to-be-called-multiple-times.patch | 155 ++++++++++++++++ 10 files changed, 787 insertions(+) create mode 100644 queue-5.10/9p-trans_fd-always-use-o_nonblock-read-write.patch create mode 100644 queue-5.10/9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch create mode 100644 queue-5.10/gfs2-check-sb_bsize_shift-after-reading-superblock.patch create mode 100644 queue-5.10/gfs2-switch-from-strlcpy-to-strscpy.patch create mode 100644 queue-5.10/kcm-avoid-potential-race-in-kcm_tx_work.patch create mode 100644 queue-5.10/kcm-close-race-conditions-on-sk_receive_queue.patch create mode 100644 queue-5.10/macvlan-enforce-a-consistent-minimal-mtu.patch create mode 100644 queue-5.10/mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch create mode 100644 queue-5.10/tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch diff --git a/queue-5.10/9p-trans_fd-always-use-o_nonblock-read-write.patch b/queue-5.10/9p-trans_fd-always-use-o_nonblock-read-write.patch new file mode 100644 index 00000000000..f181bcb2155 --- /dev/null +++ b/queue-5.10/9p-trans_fd-always-use-o_nonblock-read-write.patch @@ -0,0 +1,71 @@ +From ef575281b21e9a34dfae544a187c6aac2ae424a9 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Sat, 27 Aug 2022 00:27:46 +0900 +Subject: 9p/trans_fd: always use O_NONBLOCK read/write + +From: Tetsuo Handa + +commit ef575281b21e9a34dfae544a187c6aac2ae424a9 upstream. + +syzbot is reporting hung task at p9_fd_close() [1], for p9_mux_poll_stop() + from p9_conn_destroy() from p9_fd_close() is failing to interrupt already +started kernel_read() from p9_fd_read() from p9_read_work() and/or +kernel_write() from p9_fd_write() from p9_write_work() requests. + +Since p9_socket_open() sets O_NONBLOCK flag, p9_mux_poll_stop() does not +need to interrupt kernel_read()/kernel_write(). However, since p9_fd_open() +does not set O_NONBLOCK flag, but pipe blocks unless signal is pending, +p9_mux_poll_stop() needs to interrupt kernel_read()/kernel_write() when +the file descriptor refers to a pipe. In other words, pipe file descriptor +needs to be handled as if socket file descriptor. + +We somehow need to interrupt kernel_read()/kernel_write() on pipes. + +A minimal change, which this patch is doing, is to set O_NONBLOCK flag + from p9_fd_open(), for O_NONBLOCK flag does not affect reading/writing +of regular files. But this approach changes O_NONBLOCK flag on userspace- +supplied file descriptors (which might break userspace programs), and +O_NONBLOCK flag could be changed by userspace. It would be possible to set +O_NONBLOCK flag every time p9_fd_read()/p9_fd_write() is invoked, but still +remains small race window for clearing O_NONBLOCK flag. + +If we don't want to manipulate O_NONBLOCK flag, we might be able to +surround kernel_read()/kernel_write() with set_thread_flag(TIF_SIGPENDING) +and recalc_sigpending(). Since p9_read_work()/p9_write_work() works are +processed by kernel threads which process global system_wq workqueue, +signals could not be delivered from remote threads when p9_mux_poll_stop() + from p9_conn_destroy() from p9_fd_close() is called. Therefore, calling +set_thread_flag(TIF_SIGPENDING)/recalc_sigpending() every time would be +needed if we count on signals for making kernel_read()/kernel_write() +non-blocking. + +Link: https://lkml.kernel.org/r/345de429-a88b-7097-d177-adecf9fed342@I-love.SAKURA.ne.jp +Link: https://syzkaller.appspot.com/bug?extid=8b41a1365f1106fd0f33 [1] +Reported-by: syzbot +Signed-off-by: Tetsuo Handa +Tested-by: syzbot +Reviewed-by: Christian Schoenebeck +[Dominique: add comment at Christian's suggestion] +Signed-off-by: Dominique Martinet +Signed-off-by: Greg Kroah-Hartman +--- + net/9p/trans_fd.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/9p/trans_fd.c ++++ b/net/9p/trans_fd.c +@@ -821,11 +821,14 @@ static int p9_fd_open(struct p9_client * + goto out_free_ts; + if (!(ts->rd->f_mode & FMODE_READ)) + goto out_put_rd; ++ /* prevent workers from hanging on IO when fd is a pipe */ ++ ts->rd->f_flags |= O_NONBLOCK; + ts->wr = fget(wfd); + if (!ts->wr) + goto out_put_rd; + if (!(ts->wr->f_mode & FMODE_WRITE)) + goto out_put_wr; ++ ts->wr->f_flags |= O_NONBLOCK; + + client->trans = ts; + client->status = Connected; diff --git a/queue-5.10/9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch b/queue-5.10/9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch new file mode 100644 index 00000000000..d4e8b36ad27 --- /dev/null +++ b/queue-5.10/9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch @@ -0,0 +1,41 @@ +From 52f1c45dde9136f964d63a77d19826c8a74e2c7f Mon Sep 17 00:00:00 2001 +From: Dominique Martinet +Date: Wed, 17 Aug 2022 14:58:44 +0900 +Subject: 9p: trans_fd/p9_conn_cancel: drop client lock earlier + +From: Dominique Martinet + +commit 52f1c45dde9136f964d63a77d19826c8a74e2c7f upstream. + +syzbot reported a double-lock here and we no longer need this +lock after requests have been moved off to local list: +just drop the lock earlier. + +Link: https://lkml.kernel.org/r/20220904064028.1305220-1-asmadeus@codewreck.org +Reported-by: syzbot+50f7e8d06c3768dd97f3@syzkaller.appspotmail.com +Signed-off-by: Dominique Martinet +Tested-by: Schspa Shi +Signed-off-by: Greg Kroah-Hartman +--- + net/9p/trans_fd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/9p/trans_fd.c ++++ b/net/9p/trans_fd.c +@@ -205,6 +205,8 @@ static void p9_conn_cancel(struct p9_con + list_move(&req->req_list, &cancel_list); + } + ++ spin_unlock(&m->client->lock); ++ + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); +@@ -212,7 +214,6 @@ static void p9_conn_cancel(struct p9_con + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); + } +- spin_unlock(&m->client->lock); + } + + static __poll_t diff --git a/queue-5.10/gfs2-check-sb_bsize_shift-after-reading-superblock.patch b/queue-5.10/gfs2-check-sb_bsize_shift-after-reading-superblock.patch new file mode 100644 index 00000000000..1f309ab241d --- /dev/null +++ b/queue-5.10/gfs2-check-sb_bsize_shift-after-reading-superblock.patch @@ -0,0 +1,59 @@ +From 670f8ce56dd0632dc29a0322e188cc73ce3c6b92 Mon Sep 17 00:00:00 2001 +From: Andrew Price +Date: Wed, 17 Aug 2022 13:22:00 +0100 +Subject: gfs2: Check sb_bsize_shift after reading superblock + +From: Andrew Price + +commit 670f8ce56dd0632dc29a0322e188cc73ce3c6b92 upstream. + +Fuzzers like to scribble over sb_bsize_shift but in reality it's very +unlikely that this field would be corrupted on its own. Nevertheless it +should be checked to avoid the possibility of messy mount errors due to +bad calculations. It's always a fixed value based on the block size so +we can just check that it's the expected value. + +Tested with: + + mkfs.gfs2 -O -p lock_nolock /dev/vdb + for i in 0 -1 64 65 32 33; do + gfs2_edit -p sb field sb_bsize_shift $i /dev/vdb + mount /dev/vdb /mnt/test && umount /mnt/test + done + +Before this patch we get a withdraw after + +[ 76.413681] gfs2: fsid=loop0.0: fatal: invalid metadata block +[ 76.413681] bh = 19 (type: exp=5, found=4) +[ 76.413681] function = gfs2_meta_buffer, file = fs/gfs2/meta_io.c, line = 492 + +and with UBSAN configured we also get complaints like + +[ 76.373395] UBSAN: shift-out-of-bounds in fs/gfs2/ops_fstype.c:295:19 +[ 76.373815] shift exponent 4294967287 is too large for 64-bit type 'long unsigned int' + +After the patch, these complaints don't appear, mount fails immediately +and we get an explanation in dmesg. + +Reported-by: syzbot+dcf33a7aae997956fe06@syzkaller.appspotmail.com +Signed-off-by: Andrew Price +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/ops_fstype.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/gfs2/ops_fstype.c ++++ b/fs/gfs2/ops_fstype.c +@@ -182,7 +182,10 @@ static int gfs2_check_sb(struct gfs2_sbd + pr_warn("Invalid superblock size\n"); + return -EINVAL; + } +- ++ if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) { ++ pr_warn("Invalid block size shift\n"); ++ return -EINVAL; ++ } + return 0; + } + diff --git a/queue-5.10/gfs2-switch-from-strlcpy-to-strscpy.patch b/queue-5.10/gfs2-switch-from-strlcpy-to-strscpy.patch new file mode 100644 index 00000000000..c7f86105da1 --- /dev/null +++ b/queue-5.10/gfs2-switch-from-strlcpy-to-strscpy.patch @@ -0,0 +1,55 @@ +From 204c0300c4e99707e9fb6e57840aa1127060e63f Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Fri, 26 Aug 2022 15:12:17 +0200 +Subject: gfs2: Switch from strlcpy to strscpy + +From: Andreas Gruenbacher + +commit 204c0300c4e99707e9fb6e57840aa1127060e63f upstream. + +Switch from strlcpy to strscpy and make sure that @count is the size of +the smaller of the source and destination buffers. This prevents +reading beyond the end of the source buffer when the source string isn't +null terminated. + +Found by a modified version of syzkaller. + +Suggested-by: Wolfram Sang +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman +--- + fs/gfs2/ops_fstype.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/fs/gfs2/ops_fstype.c ++++ b/fs/gfs2/ops_fstype.c +@@ -384,8 +384,10 @@ static int init_names(struct gfs2_sbd *s + if (!table[0]) + table = sdp->sd_vfs->s_id; + +- strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); +- strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); ++ BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN); ++ ++ strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN); ++ strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN); + + table = sdp->sd_table_name; + while ((table = strchr(table, '/'))) +@@ -1417,13 +1419,13 @@ static int gfs2_parse_param(struct fs_co + + switch (o) { + case Opt_lockproto: +- strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_locktable: +- strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_hostdata: +- strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_spectator: + args->ar_spectator = 1; diff --git a/queue-5.10/kcm-avoid-potential-race-in-kcm_tx_work.patch b/queue-5.10/kcm-avoid-potential-race-in-kcm_tx_work.patch new file mode 100644 index 00000000000..b064362951d --- /dev/null +++ b/queue-5.10/kcm-avoid-potential-race-in-kcm_tx_work.patch @@ -0,0 +1,72 @@ +From ec7eede369fe5b0d085ac51fdbb95184f87bfc6c Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 12 Oct 2022 13:34:12 +0000 +Subject: kcm: avoid potential race in kcm_tx_work + +From: Eric Dumazet + +commit ec7eede369fe5b0d085ac51fdbb95184f87bfc6c upstream. + +syzbot found that kcm_tx_work() could crash [1] in: + + /* Primarily for SOCK_SEQPACKET sockets */ + if (likely(sk->sk_socket) && + test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { +<<*>> clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk->sk_write_space(sk); + } + +I think the reason is that another thread might concurrently +run in kcm_release() and call sock_orphan(sk) while sk is not +locked. kcm_tx_work() find sk->sk_socket being NULL. + +[1] +BUG: KASAN: null-ptr-deref in instrument_atomic_write include/linux/instrumented.h:86 [inline] +BUG: KASAN: null-ptr-deref in clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] +BUG: KASAN: null-ptr-deref in kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 +Write of size 8 at addr 0000000000000008 by task kworker/u4:3/53 + +CPU: 0 PID: 53 Comm: kworker/u4:3 Not tainted 5.19.0-rc3-next-20220621-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: kkcmd kcm_tx_work +Call Trace: + +__dump_stack lib/dump_stack.c:88 [inline] +dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 +kasan_report+0xbe/0x1f0 mm/kasan/report.c:495 +check_region_inline mm/kasan/generic.c:183 [inline] +kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 +instrument_atomic_write include/linux/instrumented.h:86 [inline] +clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] +kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 +process_one_work+0x996/0x1610 kernel/workqueue.c:2289 +worker_thread+0x665/0x1080 kernel/workqueue.c:2436 +kthread+0x2e9/0x3a0 kernel/kthread.c:376 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 + + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Tom Herbert +Link: https://lore.kernel.org/r/20221012133412.519394-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/kcm/kcmsock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -1844,10 +1844,10 @@ static int kcm_release(struct socket *so + kcm = kcm_sk(sk); + mux = kcm->mux; + ++ lock_sock(sk); + sock_orphan(sk); + kfree_skb(kcm->seq_skb); + +- lock_sock(sk); + /* Purge queue under lock to avoid race condition with tx_work trying + * to act when queue is nonempty. If tx_work runs after this point + * it will just return. diff --git a/queue-5.10/kcm-close-race-conditions-on-sk_receive_queue.patch b/queue-5.10/kcm-close-race-conditions-on-sk_receive_queue.patch new file mode 100644 index 00000000000..adc86ce60a8 --- /dev/null +++ b/queue-5.10/kcm-close-race-conditions-on-sk_receive_queue.patch @@ -0,0 +1,165 @@ +From 5121197ecc5db58c07da95eb1ff82b98b121a221 Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Sun, 13 Nov 2022 16:51:19 -0800 +Subject: kcm: close race conditions on sk_receive_queue + +From: Cong Wang + +commit 5121197ecc5db58c07da95eb1ff82b98b121a221 upstream. + +sk->sk_receive_queue is protected by skb queue lock, but for KCM +sockets its RX path takes mux->rx_lock to protect more than just +skb queue. However, kcm_recvmsg() still only grabs the skb queue +lock, so race conditions still exist. + +We can teach kcm_recvmsg() to grab mux->rx_lock too but this would +introduce a potential performance regression as struct kcm_mux can +be shared by multiple KCM sockets. + +So we have to enforce skb queue lock in requeue_rx_msgs() and handle +skb peek case carefully in kcm_wait_data(). Fortunately, +skb_recv_datagram() already handles it nicely and is widely used by +other sockets, we can just switch to skb_recv_datagram() after +getting rid of the unnecessary sock lock in kcm_recvmsg() and +kcm_splice_read(). Side note: SOCK_DONE is not used by KCM sockets, +so it is safe to get rid of this check too. + +I ran the original syzbot reproducer for 30 min without seeing any +issue. + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reported-by: syzbot+278279efdd2730dd14bf@syzkaller.appspotmail.com +Reported-by: shaozhengchao +Cc: Paolo Abeni +Cc: Tom Herbert +Signed-off-by: Cong Wang +Link: https://lore.kernel.org/r/20221114005119.597905-1-xiyou.wangcong@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/kcm/kcmsock.c | 60 +++++++----------------------------------------------- + 1 file changed, 8 insertions(+), 52 deletions(-) + +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -221,7 +221,7 @@ static void requeue_rx_msgs(struct kcm_m + struct sk_buff *skb; + struct kcm_sock *kcm; + +- while ((skb = __skb_dequeue(head))) { ++ while ((skb = skb_dequeue(head))) { + /* Reset destructor to avoid calling kcm_rcv_ready */ + skb->destructor = sock_rfree; + skb_orphan(skb); +@@ -1084,53 +1084,18 @@ out_error: + return err; + } + +-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, +- long timeo, int *err) +-{ +- struct sk_buff *skb; +- +- while (!(skb = skb_peek(&sk->sk_receive_queue))) { +- if (sk->sk_err) { +- *err = sock_error(sk); +- return NULL; +- } +- +- if (sock_flag(sk, SOCK_DONE)) +- return NULL; +- +- if ((flags & MSG_DONTWAIT) || !timeo) { +- *err = -EAGAIN; +- return NULL; +- } +- +- sk_wait_data(sk, &timeo, NULL); +- +- /* Handle signals */ +- if (signal_pending(current)) { +- *err = sock_intr_errno(timeo); +- return NULL; +- } +- } +- +- return skb; +-} +- + static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) + { ++ int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + struct kcm_sock *kcm = kcm_sk(sk); + int err = 0; +- long timeo; + struct strp_msg *stm; + int copied = 0; + struct sk_buff *skb; + +- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +- +- lock_sock(sk); +- +- skb = kcm_wait_data(sk, flags, timeo, &err); ++ skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + +@@ -1161,14 +1126,11 @@ msg_finished: + /* Finished with message */ + msg->msg_flags |= MSG_EOR; + KCM_STATS_INCR(kcm->stats.rx_msgs); +- skb_unlink(skb, &sk->sk_receive_queue); +- kfree_skb(skb); + } + } + + out: +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return copied ? : err; + } + +@@ -1176,9 +1138,9 @@ static ssize_t kcm_splice_read(struct so + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) + { ++ int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + struct kcm_sock *kcm = kcm_sk(sk); +- long timeo; + struct strp_msg *stm; + int err = 0; + ssize_t copied; +@@ -1186,11 +1148,7 @@ static ssize_t kcm_splice_read(struct so + + /* Only support splice for SOCKSEQPACKET */ + +- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +- +- lock_sock(sk); +- +- skb = kcm_wait_data(sk, flags, timeo, &err); ++ skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto err_out; + +@@ -1218,13 +1176,11 @@ static ssize_t kcm_splice_read(struct so + * finish reading the message. + */ + +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return copied; + + err_out: +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return err; + } + diff --git a/queue-5.10/macvlan-enforce-a-consistent-minimal-mtu.patch b/queue-5.10/macvlan-enforce-a-consistent-minimal-mtu.patch new file mode 100644 index 00000000000..85bef2f3a2c --- /dev/null +++ b/queue-5.10/macvlan-enforce-a-consistent-minimal-mtu.patch @@ -0,0 +1,45 @@ +From b64085b00044bdf3cd1c9825e9ef5b2e0feae91a Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 7 Oct 2022 15:57:43 -0700 +Subject: macvlan: enforce a consistent minimal mtu + +From: Eric Dumazet + +commit b64085b00044bdf3cd1c9825e9ef5b2e0feae91a upstream. + +macvlan should enforce a minimal mtu of 68, even at link creation. + +This patch avoids the current behavior (which could lead to crashes +in ipv6 stack if the link is brought up) + +$ ip link add macvlan1 link eno1 mtu 8 type macvlan # This should fail ! +$ ip link sh dev macvlan1 +5: macvlan1@eno1: mtu 8 qdisc noop + state DOWN mode DEFAULT group default qlen 1000 + link/ether 02:47:6c:24:74:82 brd ff:ff:ff:ff:ff:ff +$ ip link set macvlan1 mtu 67 +Error: mtu less than device minimum. +$ ip link set macvlan1 mtu 68 +$ ip link set macvlan1 mtu 8 +Error: mtu less than device minimum. + +Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -1176,7 +1176,7 @@ void macvlan_common_setup(struct net_dev + { + ether_setup(dev); + +- dev->min_mtu = 0; ++ /* ether_setup() has set dev->min_mtu to ETH_MIN_MTU. */ + dev->max_mtu = ETH_MAX_MTU; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + netif_keep_dst(dev); diff --git a/queue-5.10/mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch b/queue-5.10/mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch new file mode 100644 index 00000000000..8b41dc337f1 --- /dev/null +++ b/queue-5.10/mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch @@ -0,0 +1,115 @@ +From 1468c6f4558b1bcd92aa0400f2920f9dc7588402 Mon Sep 17 00:00:00 2001 +From: Alexander Potapenko +Date: Thu, 15 Sep 2022 17:04:16 +0200 +Subject: mm: fs: initialize fsdata passed to write_begin/write_end interface + +From: Alexander Potapenko + +commit 1468c6f4558b1bcd92aa0400f2920f9dc7588402 upstream. + +Functions implementing the a_ops->write_end() interface accept the `void +*fsdata` parameter that is supposed to be initialized by the corresponding +a_ops->write_begin() (which accepts `void **fsdata`). + +However not all a_ops->write_begin() implementations initialize `fsdata` +unconditionally, so it may get passed uninitialized to a_ops->write_end(), +resulting in undefined behavior. + +Fix this by initializing fsdata with NULL before the call to +write_begin(), rather than doing so in all possible a_ops implementations. + +This patch covers only the following cases found by running x86 KMSAN +under syzkaller: + + - generic_perform_write() + - cont_expand_zero() and generic_cont_expand_simple() + - page_symlink() + +Other cases of passing uninitialized fsdata may persist in the codebase. + +Link: https://lkml.kernel.org/r/20220915150417.722975-43-glider@google.com +Signed-off-by: Alexander Potapenko +Cc: Alexander Viro +Cc: Alexei Starovoitov +Cc: Andrey Konovalov +Cc: Andrey Konovalov +Cc: Andy Lutomirski +Cc: Arnd Bergmann +Cc: Borislav Petkov +Cc: Christoph Hellwig +Cc: Christoph Lameter +Cc: David Rientjes +Cc: Dmitry Vyukov +Cc: Eric Biggers +Cc: Eric Biggers +Cc: Eric Dumazet +Cc: Greg Kroah-Hartman +Cc: Herbert Xu +Cc: Ilya Leoshkevich +Cc: Ingo Molnar +Cc: Jens Axboe +Cc: Joonsoo Kim +Cc: Kees Cook +Cc: Marco Elver +Cc: Mark Rutland +Cc: Matthew Wilcox +Cc: Michael S. Tsirkin +Cc: Pekka Enberg +Cc: Peter Zijlstra +Cc: Petr Mladek +Cc: Stephen Rothwell +Cc: Steven Rostedt +Cc: Thomas Gleixner +Cc: Vasily Gorbik +Cc: Vegard Nossum +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/buffer.c | 4 ++-- + fs/namei.c | 2 +- + mm/filemap.c | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/buffer.c ++++ b/fs/buffer.c +@@ -2350,7 +2350,7 @@ int generic_cont_expand_simple(struct in + { + struct address_space *mapping = inode->i_mapping; + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int err; + + err = inode_newsize_ok(inode, size); +@@ -2376,7 +2376,7 @@ static int cont_expand_zero(struct file + struct inode *inode = mapping->host; + unsigned int blocksize = i_blocksize(inode); + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + pgoff_t index, curidx; + loff_t curpos; + unsigned zerofrom, offset, len; +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4633,7 +4633,7 @@ int __page_symlink(struct inode *inode, + { + struct address_space *mapping = inode->i_mapping; + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int err; + unsigned int flags = 0; + if (nofs) +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -3303,7 +3303,7 @@ ssize_t generic_perform_write(struct fil + unsigned long offset; /* Offset into pagecache page */ + unsigned long bytes; /* Bytes to write to page */ + size_t copied; /* Bytes copied from user */ +- void *fsdata; ++ void *fsdata = NULL; + + offset = (pos & (PAGE_SIZE - 1)); + bytes = min_t(unsigned long, PAGE_SIZE - offset, diff --git a/queue-5.10/series b/queue-5.10/series index 3dca5538d24..6c71548ca2e 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -135,3 +135,12 @@ scsi-scsi_debug-fix-possible-uaf-in-sdebug_add_host_.patch kprobes-skip-clearing-aggrprobe-s-post_handler-in-kp.patch input-i8042-fix-leaking-of-platform-device-on-module.patch uapi-linux-stddef.h-add-include-guards.patch +macvlan-enforce-a-consistent-minimal-mtu.patch +tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch +kcm-avoid-potential-race-in-kcm_tx_work.patch +kcm-close-race-conditions-on-sk_receive_queue.patch +9p-trans_fd-p9_conn_cancel-drop-client-lock-earlier.patch +gfs2-check-sb_bsize_shift-after-reading-superblock.patch +gfs2-switch-from-strlcpy-to-strscpy.patch +9p-trans_fd-always-use-o_nonblock-read-write.patch +mm-fs-initialize-fsdata-passed-to-write_begin-write_end-interface.patch diff --git a/queue-5.10/tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch b/queue-5.10/tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch new file mode 100644 index 00000000000..6148722f794 --- /dev/null +++ b/queue-5.10/tcp-cdg-allow-tcp_cdg_release-to-be-called-multiple-times.patch @@ -0,0 +1,155 @@ +From 72e560cb8c6f80fc2b4afc5d3634a32465e13a51 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 11 Oct 2022 15:07:48 -0700 +Subject: tcp: cdg: allow tcp_cdg_release() to be called multiple times + +From: Eric Dumazet + +commit 72e560cb8c6f80fc2b4afc5d3634a32465e13a51 upstream. + +Apparently, mptcp is able to call tcp_disconnect() on an already +disconnected flow. This is generally fine, unless current congestion +control is CDG, because it might trigger a double-free [1] + +Instead of fixing MPTCP, and future bugs, we can make tcp_disconnect() +more resilient. + +[1] +BUG: KASAN: double-free in slab_free mm/slub.c:3539 [inline] +BUG: KASAN: double-free in kfree+0xe2/0x580 mm/slub.c:4567 + +CPU: 0 PID: 3645 Comm: kworker/0:7 Not tainted 6.0.0-syzkaller-02734-g0326074ff465 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 +Workqueue: events mptcp_worker +Call Trace: + +__dump_stack lib/dump_stack.c:88 [inline] +dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 +print_address_description mm/kasan/report.c:317 [inline] +print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 +kasan_report_invalid_free+0x81/0x190 mm/kasan/report.c:462 +____kasan_slab_free+0x18b/0x1c0 mm/kasan/common.c:356 +kasan_slab_free include/linux/kasan.h:200 [inline] +slab_free_hook mm/slub.c:1759 [inline] +slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1785 +slab_free mm/slub.c:3539 [inline] +kfree+0xe2/0x580 mm/slub.c:4567 +tcp_disconnect+0x980/0x1e20 net/ipv4/tcp.c:3145 +__mptcp_close_ssk+0x5ca/0x7e0 net/mptcp/protocol.c:2327 +mptcp_do_fastclose net/mptcp/protocol.c:2592 [inline] +mptcp_worker+0x78c/0xff0 net/mptcp/protocol.c:2627 +process_one_work+0x991/0x1610 kernel/workqueue.c:2289 +worker_thread+0x665/0x1080 kernel/workqueue.c:2436 +kthread+0x2e4/0x3a0 kernel/kthread.c:376 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 + + +Allocated by task 3671: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +kasan_set_track mm/kasan/common.c:45 [inline] +set_alloc_info mm/kasan/common.c:437 [inline] +____kasan_kmalloc mm/kasan/common.c:516 [inline] +____kasan_kmalloc mm/kasan/common.c:475 [inline] +__kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 +kmalloc_array include/linux/slab.h:640 [inline] +kcalloc include/linux/slab.h:671 [inline] +tcp_cdg_init+0x10d/0x170 net/ipv4/tcp_cdg.c:380 +tcp_init_congestion_control+0xab/0x550 net/ipv4/tcp_cong.c:193 +tcp_reinit_congestion_control net/ipv4/tcp_cong.c:217 [inline] +tcp_set_congestion_control+0x96c/0xaa0 net/ipv4/tcp_cong.c:391 +do_tcp_setsockopt+0x505/0x2320 net/ipv4/tcp.c:3513 +tcp_setsockopt+0xd4/0x100 net/ipv4/tcp.c:3801 +mptcp_setsockopt+0x35f/0x2570 net/mptcp/sockopt.c:844 +__sys_setsockopt+0x2d6/0x690 net/socket.c:2252 +__do_sys_setsockopt net/socket.c:2263 [inline] +__se_sys_setsockopt net/socket.c:2260 [inline] +__x64_sys_setsockopt+0xba/0x150 net/socket.c:2260 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Freed by task 16: +kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 +kasan_set_track+0x21/0x30 mm/kasan/common.c:45 +kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 +____kasan_slab_free mm/kasan/common.c:367 [inline] +____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 +kasan_slab_free include/linux/kasan.h:200 [inline] +slab_free_hook mm/slub.c:1759 [inline] +slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1785 +slab_free mm/slub.c:3539 [inline] +kfree+0xe2/0x580 mm/slub.c:4567 +tcp_cleanup_congestion_control+0x70/0x120 net/ipv4/tcp_cong.c:226 +tcp_v4_destroy_sock+0xdd/0x750 net/ipv4/tcp_ipv4.c:2254 +tcp_v6_destroy_sock+0x11/0x20 net/ipv6/tcp_ipv6.c:1969 +inet_csk_destroy_sock+0x196/0x440 net/ipv4/inet_connection_sock.c:1157 +tcp_done+0x23b/0x340 net/ipv4/tcp.c:4649 +tcp_rcv_state_process+0x40e7/0x4990 net/ipv4/tcp_input.c:6624 +tcp_v6_do_rcv+0x3fc/0x13c0 net/ipv6/tcp_ipv6.c:1525 +tcp_v6_rcv+0x2e8e/0x3830 net/ipv6/tcp_ipv6.c:1759 +ip6_protocol_deliver_rcu+0x2db/0x1950 net/ipv6/ip6_input.c:439 +ip6_input_finish+0x14c/0x2c0 net/ipv6/ip6_input.c:484 +NF_HOOK include/linux/netfilter.h:302 [inline] +NF_HOOK include/linux/netfilter.h:296 [inline] +ip6_input+0x9c/0xd0 net/ipv6/ip6_input.c:493 +dst_input include/net/dst.h:455 [inline] +ip6_rcv_finish+0x193/0x2c0 net/ipv6/ip6_input.c:79 +ip_sabotage_in net/bridge/br_netfilter_hooks.c:874 [inline] +ip_sabotage_in+0x1fa/0x260 net/bridge/br_netfilter_hooks.c:865 +nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] +nf_hook_slow+0xc5/0x1f0 net/netfilter/core.c:614 +nf_hook.constprop.0+0x3ac/0x650 include/linux/netfilter.h:257 +NF_HOOK include/linux/netfilter.h:300 [inline] +ipv6_rcv+0x9e/0x380 net/ipv6/ip6_input.c:309 +__netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5485 +__netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 +netif_receive_skb_internal net/core/dev.c:5685 [inline] +netif_receive_skb+0x12f/0x8d0 net/core/dev.c:5744 +NF_HOOK include/linux/netfilter.h:302 [inline] +NF_HOOK include/linux/netfilter.h:296 [inline] +br_pass_frame_up+0x303/0x410 net/bridge/br_input.c:68 +br_handle_frame_finish+0x909/0x1aa0 net/bridge/br_input.c:199 +br_nf_hook_thresh+0x2f8/0x3d0 net/bridge/br_netfilter_hooks.c:1041 +br_nf_pre_routing_finish_ipv6+0x695/0xef0 net/bridge/br_netfilter_ipv6.c:207 +NF_HOOK include/linux/netfilter.h:302 [inline] +br_nf_pre_routing_ipv6+0x417/0x7c0 net/bridge/br_netfilter_ipv6.c:237 +br_nf_pre_routing+0x1496/0x1fe0 net/bridge/br_netfilter_hooks.c:507 +nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] +nf_hook_bridge_pre net/bridge/br_input.c:255 [inline] +br_handle_frame+0x9c9/0x12d0 net/bridge/br_input.c:399 +__netif_receive_skb_core+0x9fe/0x38f0 net/core/dev.c:5379 +__netif_receive_skb_one_core+0xae/0x180 net/core/dev.c:5483 +__netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5599 +process_backlog+0x3a0/0x7c0 net/core/dev.c:5927 +__napi_poll+0xb3/0x6d0 net/core/dev.c:6494 +napi_poll net/core/dev.c:6561 [inline] +net_rx_action+0x9c1/0xd90 net/core/dev.c:6672 +__do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 + +Fixes: 2b0a8c9eee81 ("tcp: add CDG congestion control") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cdg.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/tcp_cdg.c ++++ b/net/ipv4/tcp_cdg.c +@@ -375,6 +375,7 @@ static void tcp_cdg_init(struct sock *sk + struct cdg *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + ++ ca->gradients = NULL; + /* We silently fall back to window = 1 if allocation fails. */ + if (window > 1) + ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), +@@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock + struct cdg *ca = inet_csk_ca(sk); + + kfree(ca->gradients); ++ ca->gradients = NULL; + } + + static struct tcp_congestion_ops tcp_cdg __read_mostly = { -- 2.47.3