--- /dev/null
+From stable+bounces-86910-greg=kroah.com@vger.kernel.org Sat Oct 19 12:29:23 2024
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sat, 19 Oct 2024 12:29:08 +0200
+Subject: mptcp: handle consistently DSS corruption
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Paolo Abeni <pabeni@redhat.com>, sashal@kernel.org, Matthieu Baerts <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20241019102905.3383483-7-matttbe@kernel.org>
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit e32d262c89e2b22cb0640223f953b548617ed8a6 upstream.
+
+Bugged peer implementation can send corrupted DSS options, consistently
+hitting a few warning in the data path. Use DEBUG_NET assertions, to
+avoid the splat on some builds and handle consistently the error, dumping
+related MIBs and performing fallback and/or reset according to the
+subflow type.
+
+Fixes: 6771bfd9ee24 ("mptcp: update mptcp ack sequence from work queue")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-1-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Conflicts in mib.[ch], because commit 104125b82e5c ("mptcp: add mib
+ for infinite map sending") is linked to a new feature, not available
+ in this version. Resolving the conflicts is easy, simply adding the
+ new lines declaring the new "DSS corruptions" MIB entries.
+ Also removed in protocol.c and subflow.c all DEBUG_NET_WARN_ON_ONCE
+ because they are not defined in this version: enough with the MIB
+ counters that have been added in this commit. ]
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/mib.c | 2 ++
+ net/mptcp/mib.h | 2 ++
+ net/mptcp/protocol.c | 20 +++++++++++++++++---
+ net/mptcp/subflow.c | 2 +-
+ 4 files changed, 22 insertions(+), 4 deletions(-)
+
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -23,6 +23,8 @@ static const struct snmp_mib mptcp_snmp_
+ SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
+ SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
++ SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK),
++ SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET),
+ SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
+ SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -16,6 +16,8 @@ enum linux_mptcp_mib_field {
+ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
+ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
++ MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */
++ MPTCP_MIB_DSSCORRUPTIONRESET, /* DSS corruption detected, MPJ subflow reset */
+ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
+ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -457,6 +457,18 @@ static void mptcp_check_data_fin(struct
+ }
+ }
+
++static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
++{
++ if (READ_ONCE(msk->allow_infinite_fallback)) {
++ MPTCP_INC_STATS(sock_net(ssk),
++ MPTCP_MIB_DSSCORRUPTIONFALLBACK);
++ mptcp_do_fallback(ssk);
++ } else {
++ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
++ mptcp_subflow_reset(ssk);
++ }
++}
++
+ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
+ struct sock *ssk,
+ unsigned int *bytes)
+@@ -519,10 +531,12 @@ static bool __mptcp_move_skbs_from_subfl
+ moved += len;
+ seq += len;
+
+- if (WARN_ON_ONCE(map_remaining < len))
+- break;
++ if (unlikely(map_remaining < len))
++ mptcp_dss_corruption(msk, ssk);
+ } else {
+- WARN_ON_ONCE(!fin);
++ if (unlikely(!fin))
++ mptcp_dss_corruption(msk, ssk);
++
+ sk_eat_skb(ssk, skb);
+ done = true;
+ }
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -702,7 +702,7 @@ static bool skb_is_fully_mapped(struct s
+ unsigned int skb_consumed;
+
+ skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
+- if (WARN_ON_ONCE(skb_consumed >= skb->len))
++ if (unlikely(skb_consumed >= skb->len))
+ return true;
+
+ return skb->len - skb_consumed <= subflow->map_data_len -
--- /dev/null
+From stable+bounces-86909-greg=kroah.com@vger.kernel.org Sat Oct 19 12:29:21 2024
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sat, 19 Oct 2024 12:29:07 +0200
+Subject: mptcp: track and update contiguous data status
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Geliang Tang <geliang.tang@suse.com>, sashal@kernel.org, Paolo Abeni <pabeni@redhat.com>, Mat Martineau <mathew.j.martineau@linux.intel.com>, "David S . Miller" <davem@davemloft.net>, Matthieu Baerts <matttbe@kernel.org>
+Message-ID: <20241019102905.3383483-6-matttbe@kernel.org>
+
+From: Geliang Tang <geliang.tang@suse.com>
+
+commit 0530020a7c8f2204e784f0dbdc882bbd961fdbde upstream.
+
+This patch adds a new member allow_infinite_fallback in mptcp_sock,
+which is initialized to 'true' when the connection begins and is set
+to 'false' on any retransmit or successful MP_JOIN. Only do infinite
+mapping fallback if there is a single subflow AND there have been no
+retransmissions AND there have never been any MP_JOINs.
+
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: e32d262c89e2 ("mptcp: handle consistently DSS corruption")
+[ Conflicts in protocol.c, because commit 3e5014909b56 ("mptcp: cleanup
+ MPJ subflow list handling") is not in this version. This commit is
+ linked to a new feature, changing the context around. The new line
+ can still be added at the same place.
+ Conflicts in protocol.h, because commit 4f6e14bd19d6 ("mptcp: support
+ TCP_CORK and TCP_NODELAY") is not in this version. This commit is
+ linked to a new feature, changing the context around. The new line can
+ still be added at the same place.
+ Conflicts in subflow.c, because commit 0348c690ed37 ("mptcp: add the
+ fallback check") is not in this version. This commit is linked to a
+ new feature, changing the context around. The new line can still be
+ added at the same place.
+ Extra conflicts in v5.10, because the context has been changed. ]
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c | 6 +++++-
+ net/mptcp/protocol.h | 1 +
+ net/mptcp/subflow.c | 1 +
+ 3 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1810,9 +1810,11 @@ static void mptcp_worker(struct work_str
+ if (!mptcp_ext_cache_refill(msk))
+ break;
+ }
+- if (copied)
++ if (copied) {
+ tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle,
+ size_goal);
++ WRITE_ONCE(msk->allow_infinite_fallback, false);
++ }
+
+ dfrag->data_seq = orig_write_seq;
+ dfrag->offset = orig_offset;
+@@ -1845,6 +1847,7 @@ static int __mptcp_init_sock(struct sock
+
+ msk->first = NULL;
+ inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
++ WRITE_ONCE(msk->allow_infinite_fallback, true);
+
+ mptcp_pm_data_init(msk);
+
+@@ -2543,6 +2546,7 @@ bool mptcp_finish_join(struct sock *sk)
+ if (parent_sock && !sk->sk_socket)
+ mptcp_sock_graft(sk, parent_sock);
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
++ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ return true;
+ }
+
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -213,6 +213,7 @@ struct mptcp_sock {
+ bool rcv_data_fin;
+ bool snd_data_fin_enable;
+ bool use_64bit_ack; /* Set when we received a 64-bit DSN */
++ bool allow_infinite_fallback;
+ spinlock_t join_list_lock;
+ struct work_struct work;
+ struct sk_buff *ooo_last_skb;
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1179,6 +1179,7 @@ int __mptcp_subflow_connect(struct sock
+ list_add_tail(&subflow->node, &msk->join_list);
+ spin_unlock_bh(&msk->join_list_lock);
+
++ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ return err;
+
+ failed:
--- /dev/null
+From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Fri, 4 Oct 2024 12:35:31 +0900
+Subject: nilfs2: propagate directory read errors from nilfs_find_entry()
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 08cfa12adf888db98879dbd735bc741360a34168 upstream.
+
+Syzbot reported that a task hang occurs in vcs_open() during a fuzzing
+test for nilfs2.
+
+The root cause of this problem is that in nilfs_find_entry(), which
+searches for directory entries, ignores errors when loading a directory
+page/folio via nilfs_get_folio() fails.
+
+If the filesystem images is corrupted, and the i_size of the directory
+inode is large, and the directory page/folio is successfully read but
+fails the sanity check, for example when it is zero-filled,
+nilfs_check_folio() may continue to spit out error messages in bursts.
+
+Fix this issue by propagating the error to the callers when loading a
+page/folio fails in nilfs_find_entry().
+
+The current interface of nilfs_find_entry() and its callers is outdated
+and cannot propagate error codes such as -EIO and -ENOMEM returned via
+nilfs_find_entry(), so fix it together.
+
+Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com
+Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: Lizhi Xu <lizhi.xu@windriver.com>
+Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com
+Reported-by: syzbot+8a192e8d090fa9a31135@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/dir.c | 50 +++++++++++++++++++++++++++-----------------------
+ fs/nilfs2/namei.c | 39 ++++++++++++++++++++++++++-------------
+ fs/nilfs2/nilfs.h | 2 +-
+ 3 files changed, 54 insertions(+), 37 deletions(-)
+
+--- a/fs/nilfs2/dir.c
++++ b/fs/nilfs2/dir.c
+@@ -331,6 +331,8 @@ static int nilfs_readdir(struct file *fi
+ * returns the page in which the entry was found, and the entry itself
+ * (as a parameter - res_dir). Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
++ *
++ * On failure, returns an error pointer and the caller should ignore res_page.
+ */
+ struct nilfs_dir_entry *
+ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
+@@ -358,22 +360,24 @@ nilfs_find_entry(struct inode *dir, cons
+ do {
+ char *kaddr = nilfs_get_page(dir, n, &page);
+
+- if (!IS_ERR(kaddr)) {
+- de = (struct nilfs_dir_entry *)kaddr;
+- kaddr += nilfs_last_byte(dir, n) - reclen;
+- while ((char *) de <= kaddr) {
+- if (de->rec_len == 0) {
+- nilfs_error(dir->i_sb,
+- "zero-length directory entry");
+- nilfs_put_page(page);
+- goto out;
+- }
+- if (nilfs_match(namelen, name, de))
+- goto found;
+- de = nilfs_next_entry(de);
++ if (IS_ERR(kaddr))
++ return ERR_CAST(kaddr);
++
++ de = (struct nilfs_dir_entry *)kaddr;
++ kaddr += nilfs_last_byte(dir, n) - reclen;
++ while ((char *)de <= kaddr) {
++ if (de->rec_len == 0) {
++ nilfs_error(dir->i_sb,
++ "zero-length directory entry");
++ nilfs_put_page(page);
++ goto out;
+ }
+- nilfs_put_page(page);
++ if (nilfs_match(namelen, name, de))
++ goto found;
++ de = nilfs_next_entry(de);
+ }
++ nilfs_put_page(page);
++
+ if (++n >= npages)
+ n = 0;
+ /* next page is past the blocks we've got */
+@@ -386,7 +390,7 @@ nilfs_find_entry(struct inode *dir, cons
+ }
+ } while (n != start);
+ out:
+- return NULL;
++ return ERR_PTR(-ENOENT);
+
+ found:
+ *res_page = page;
+@@ -431,19 +435,19 @@ fail:
+ return NULL;
+ }
+
+-ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
++int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino)
+ {
+- ino_t res = 0;
+ struct nilfs_dir_entry *de;
+ struct page *page;
+
+ de = nilfs_find_entry(dir, qstr, &page);
+- if (de) {
+- res = le64_to_cpu(de->inode);
+- kunmap(page);
+- put_page(page);
+- }
+- return res;
++ if (IS_ERR(de))
++ return PTR_ERR(de);
++
++ *ino = le64_to_cpu(de->inode);
++ kunmap(page);
++ put_page(page);
++ return 0;
+ }
+
+ /* Releases the page */
+--- a/fs/nilfs2/namei.c
++++ b/fs/nilfs2/namei.c
+@@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct d
+ {
+ struct inode *inode;
+ ino_t ino;
++ int res;
+
+ if (dentry->d_name.len > NILFS_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+- ino = nilfs_inode_by_name(dir, &dentry->d_name);
+- inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
++ res = nilfs_inode_by_name(dir, &dentry->d_name, &ino);
++ if (res) {
++ if (res != -ENOENT)
++ return ERR_PTR(res);
++ inode = NULL;
++ } else {
++ inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
++ }
++
+ return d_splice_alias(inode, dentry);
+ }
+
+@@ -261,10 +269,11 @@ static int nilfs_do_unlink(struct inode
+ struct page *page;
+ int err;
+
+- err = -ENOENT;
+ de = nilfs_find_entry(dir, &dentry->d_name, &page);
+- if (!de)
++ if (IS_ERR(de)) {
++ err = PTR_ERR(de);
+ goto out;
++ }
+
+ inode = d_inode(dentry);
+ err = -EIO;
+@@ -358,10 +367,11 @@ static int nilfs_rename(struct inode *ol
+ if (unlikely(err))
+ return err;
+
+- err = -ENOENT;
+ old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+- if (!old_de)
++ if (IS_ERR(old_de)) {
++ err = PTR_ERR(old_de);
+ goto out;
++ }
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+@@ -378,10 +388,12 @@ static int nilfs_rename(struct inode *ol
+ if (dir_de && !nilfs_empty_dir(new_inode))
+ goto out_dir;
+
+- err = -ENOENT;
+- new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+- if (!new_de)
++ new_de = nilfs_find_entry(new_dir, &new_dentry->d_name,
++ &new_page);
++ if (IS_ERR(new_de)) {
++ err = PTR_ERR(new_de);
+ goto out_dir;
++ }
+ nilfs_set_link(new_dir, new_de, new_page, old_inode);
+ nilfs_mark_inode_dirty(new_dir);
+ new_inode->i_ctime = current_time(new_inode);
+@@ -435,14 +447,15 @@ out:
+ */
+ static struct dentry *nilfs_get_parent(struct dentry *child)
+ {
+- unsigned long ino;
++ ino_t ino;
++ int res;
+ struct inode *inode;
+ struct qstr dotdot = QSTR_INIT("..", 2);
+ struct nilfs_root *root;
+
+- ino = nilfs_inode_by_name(d_inode(child), &dotdot);
+- if (!ino)
+- return ERR_PTR(-ENOENT);
++ res = nilfs_inode_by_name(d_inode(child), &dotdot, &ino);
++ if (res)
++ return ERR_PTR(res);
+
+ root = NILFS_I(d_inode(child))->i_root;
+
+--- a/fs/nilfs2/nilfs.h
++++ b/fs/nilfs2/nilfs.h
+@@ -233,7 +233,7 @@ static inline __u32 nilfs_mask_flags(umo
+
+ /* dir.c */
+ extern int nilfs_add_link(struct dentry *, struct inode *);
+-extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
++int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino);
+ extern int nilfs_make_empty(struct inode *, struct inode *);
+ extern struct nilfs_dir_entry *
+ nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
--- /dev/null
+From 7b31f7dadd7074fa70bb14a53bd286ffdfc98b04 Mon Sep 17 00:00:00 2001
+From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Date: Mon, 4 Jul 2022 12:08:51 +0530
+Subject: powerpc/mm: Always update max/min_low_pfn in mem_topology_setup()
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+commit 7b31f7dadd7074fa70bb14a53bd286ffdfc98b04 upstream.
+
+For both CONFIG_NUMA enabled/disabled use mem_topology_setup() to
+update max/min_low_pfn.
+
+This also adds min_low_pfn update to CONFIG_NUMA which was initialized
+to zero before. (mpe: Though MEMORY_START is == 0 for PPC64=y which is
+all possible NUMA=y systems)
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220704063851.295482-1-aneesh.kumar@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/mm/numa.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -1177,6 +1177,9 @@ void __init mem_topology_setup(void)
+ {
+ int cpu;
+
++ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
++ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
++
+ /*
+ * Linux/mm assumes node 0 to be online at boot. However this is not
+ * true on PowerPC, where node 0 is similar to any other node, it
+@@ -1221,9 +1224,6 @@ void __init initmem_init(void)
+ {
+ int nid;
+
+- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+- max_pfn = max_low_pfn;
+-
+ memblock_dump_all();
+
+ for_each_online_node(nid) {
x86-entry_32-do-not-clobber-user-eflags.zf.patch
x86-entry_32-clear-cpu-buffers-after-register-restore-in-nmi-return.patch
irqchip-gic-v4-don-t-allow-a-vmovp-on-a-dying-vpe.patch
+mptcp-track-and-update-contiguous-data-status.patch
+mptcp-handle-consistently-dss-corruption.patch
+tcp-fix-mptcp-dss-corruption-due-to-large-pmtu-xmit.patch
+nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch
+powerpc-mm-always-update-max-min_low_pfn-in-mem_topology_setup.patch
--- /dev/null
+From stable+bounces-86911-greg=kroah.com@vger.kernel.org Sat Oct 19 12:29:21 2024
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sat, 19 Oct 2024 12:29:09 +0200
+Subject: tcp: fix mptcp DSS corruption due to large pmtu xmit
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Paolo Abeni <pabeni@redhat.com>, sashal@kernel.org, syzbot+d1bff73460e33101f0e7@syzkaller.appspotmail.com, Matthieu Baerts <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20241019102905.3383483-8-matttbe@kernel.org>
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 4dabcdf581217e60690467a37c956a5b8dbc6bd9 upstream.
+
+Syzkaller was able to trigger a DSS corruption:
+
+ TCP: request_sock_subflow_v4: Possible SYN flooding on port [::]:20002. Sending cookies.
+ ------------[ cut here ]------------
+ WARNING: CPU: 0 PID: 5227 at net/mptcp/protocol.c:695 __mptcp_move_skbs_from_subflow+0x20a9/0x21f0 net/mptcp/protocol.c:695
+ Modules linked in:
+ CPU: 0 UID: 0 PID: 5227 Comm: syz-executor350 Not tainted 6.11.0-syzkaller-08829-gaf9c191ac2a0 #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024
+ RIP: 0010:__mptcp_move_skbs_from_subflow+0x20a9/0x21f0 net/mptcp/protocol.c:695
+ Code: 0f b6 dc 31 ff 89 de e8 b5 dd ea f5 89 d8 48 81 c4 50 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 98 da ea f5 90 <0f> 0b 90 e9 47 ff ff ff e8 8a da ea f5 90 0f 0b 90 e9 99 e0 ff ff
+ RSP: 0018:ffffc90000006db8 EFLAGS: 00010246
+ RAX: ffffffff8ba9df18 RBX: 00000000000055f0 RCX: ffff888030023c00
+ RDX: 0000000000000100 RSI: 00000000000081e5 RDI: 00000000000055f0
+ RBP: 1ffff110062bf1ae R08: ffffffff8ba9cf12 R09: 1ffff110062bf1b8
+ R10: dffffc0000000000 R11: ffffed10062bf1b9 R12: 0000000000000000
+ R13: dffffc0000000000 R14: 00000000700cec61 R15: 00000000000081e5
+ FS: 000055556679c380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000020287000 CR3: 0000000077892000 CR4: 00000000003506f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ <IRQ>
+ move_skbs_to_msk net/mptcp/protocol.c:811 [inline]
+ mptcp_data_ready+0x29c/0xa90 net/mptcp/protocol.c:854
+ subflow_data_ready+0x34a/0x920 net/mptcp/subflow.c:1490
+ tcp_data_queue+0x20fd/0x76c0 net/ipv4/tcp_input.c:5283
+ tcp_rcv_established+0xfba/0x2020 net/ipv4/tcp_input.c:6237
+ tcp_v4_do_rcv+0x96d/0xc70 net/ipv4/tcp_ipv4.c:1915
+ tcp_v4_rcv+0x2dc0/0x37f0 net/ipv4/tcp_ipv4.c:2350
+ ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205
+ ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233
+ NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314
+ NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314
+ __netif_receive_skb_one_core net/core/dev.c:5662 [inline]
+ __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775
+ process_backlog+0x662/0x15b0 net/core/dev.c:6107
+ __napi_poll+0xcb/0x490 net/core/dev.c:6771
+ napi_poll net/core/dev.c:6840 [inline]
+ net_rx_action+0x89b/0x1240 net/core/dev.c:6962
+ handle_softirqs+0x2c5/0x980 kernel/softirq.c:554
+ do_softirq+0x11b/0x1e0 kernel/softirq.c:455
+ </IRQ>
+ <TASK>
+ __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382
+ local_bh_enable include/linux/bottom_half.h:33 [inline]
+ rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline]
+ __dev_queue_xmit+0x1764/0x3e80 net/core/dev.c:4451
+ dev_queue_xmit include/linux/netdevice.h:3094 [inline]
+ neigh_hh_output include/net/neighbour.h:526 [inline]
+ neigh_output include/net/neighbour.h:540 [inline]
+ ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:236
+ ip_local_out net/ipv4/ip_output.c:130 [inline]
+ __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:536
+ __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466
+ tcp_transmit_skb net/ipv4/tcp_output.c:1484 [inline]
+ tcp_mtu_probe net/ipv4/tcp_output.c:2547 [inline]
+ tcp_write_xmit+0x641d/0x6bf0 net/ipv4/tcp_output.c:2752
+ __tcp_push_pending_frames+0x9b/0x360 net/ipv4/tcp_output.c:3015
+ tcp_push_pending_frames include/net/tcp.h:2107 [inline]
+ tcp_data_snd_check net/ipv4/tcp_input.c:5714 [inline]
+ tcp_rcv_established+0x1026/0x2020 net/ipv4/tcp_input.c:6239
+ tcp_v4_do_rcv+0x96d/0xc70 net/ipv4/tcp_ipv4.c:1915
+ sk_backlog_rcv include/net/sock.h:1113 [inline]
+ __release_sock+0x214/0x350 net/core/sock.c:3072
+ release_sock+0x61/0x1f0 net/core/sock.c:3626
+ mptcp_push_release net/mptcp/protocol.c:1486 [inline]
+ __mptcp_push_pending+0x6b5/0x9f0 net/mptcp/protocol.c:1625
+ mptcp_sendmsg+0x10bb/0x1b10 net/mptcp/protocol.c:1903
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ __sock_sendmsg+0x1a6/0x270 net/socket.c:745
+ ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2603
+ ___sys_sendmsg net/socket.c:2657 [inline]
+ __sys_sendmsg+0x2aa/0x390 net/socket.c:2686
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+ RIP: 0033:0x7fb06e9317f9
+ Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+ RSP: 002b:00007ffe2cfd4f98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+ RAX: ffffffffffffffda RBX: 00007fb06e97f468 RCX: 00007fb06e9317f9
+ RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000005
+ RBP: 00007fb06e97f446 R08: 0000555500000000 R09: 0000555500000000
+ R10: 0000555500000000 R11: 0000000000000246 R12: 00007fb06e97f406
+ R13: 0000000000000001 R14: 00007ffe2cfd4fe0 R15: 0000000000000003
+ </TASK>
+
+Additionally syzkaller provided a nice reproducer. The repro enables
+pmtu on the loopback device, leading to tcp_mtu_probe() generating
+very large probe packets.
+
+tcp_can_coalesce_send_queue_head() currently does not check for
+mptcp-level invariants, and allowed the creation of cross-DSS probes,
+leading to the mentioned corruption.
+
+Address the issue teaching tcp_can_coalesce_send_queue_head() about
+mptcp using the tcp_skb_can_collapse(), also reducing the code
+duplication.
+
+Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+d1bff73460e33101f0e7@syzkaller.appspotmail.com
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/513
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-2-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Conflict in tcp_output.c, because commit 65249feb6b3d ("net: add
+ support for skbs with unreadable frags"), and commit 9b65b17db723
+ ("net: avoid double accounting for pure zerocopy skbs") are not in
+ this version. These commits are linked to new features and introduce
+ new conditions which cause the conflicts. Resolving this is easy: we
+ can ignore the missing new condition, and use tcp_skb_can_collapse()
+ like in the original patch. ]
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2305,7 +2305,7 @@ static bool tcp_can_coalesce_send_queue_
+ if (len <= skb->len)
+ break;
+
+- if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
++ if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next))
+ return false;
+
+ len -= skb->len;