--- /dev/null
+From f6fca73dc66d83d92c64deb18842e51646064802 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 14:55:55 +0100
+Subject: bonding: Fix out-of-bounds read in bond_option_arp_ip_targets_set()
+
+From: Sam Sun <samsun1006219@gmail.com>
+
+[ Upstream commit e271ff53807e8f2c628758290f0e499dbe51cb3d ]
+
+In function bond_option_arp_ip_targets_set(), if newval->string is an
+empty string, newval->string+1 will point to the byte after the
+string, causing an out-of-bound read.
+
+BUG: KASAN: slab-out-of-bounds in strlen+0x7d/0xa0 lib/string.c:418
+Read of size 1 at addr ffff8881119c4781 by task syz-executor665/8107
+CPU: 1 PID: 8107 Comm: syz-executor665 Not tainted 6.7.0-rc7 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:364 [inline]
+ print_report+0xc1/0x5e0 mm/kasan/report.c:475
+ kasan_report+0xbe/0xf0 mm/kasan/report.c:588
+ strlen+0x7d/0xa0 lib/string.c:418
+ __fortify_strlen include/linux/fortify-string.h:210 [inline]
+ in4_pton+0xa3/0x3f0 net/core/utils.c:130
+ bond_option_arp_ip_targets_set+0xc2/0x910
+drivers/net/bonding/bond_options.c:1201
+ __bond_opt_set+0x2a4/0x1030 drivers/net/bonding/bond_options.c:767
+ __bond_opt_set_notify+0x48/0x150 drivers/net/bonding/bond_options.c:792
+ bond_opt_tryset_rtnl+0xda/0x160 drivers/net/bonding/bond_options.c:817
+ bonding_sysfs_store_option+0xa1/0x120 drivers/net/bonding/bond_sysfs.c:156
+ dev_attr_store+0x54/0x80 drivers/base/core.c:2366
+ sysfs_kf_write+0x114/0x170 fs/sysfs/file.c:136
+ kernfs_fop_write_iter+0x337/0x500 fs/kernfs/file.c:334
+ call_write_iter include/linux/fs.h:2020 [inline]
+ new_sync_write fs/read_write.c:491 [inline]
+ vfs_write+0x96a/0xd80 fs/read_write.c:584
+ ksys_write+0x122/0x250 fs/read_write.c:637
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b
+---[ end trace ]---
+
+Fix it by adding a check of string length before using it.
+
+Fixes: f9de11a16594 ("bonding: add ip checks when store ip target")
+Signed-off-by: Yue Sun <samsun1006219@gmail.com>
+Signed-off-by: Simon Horman <horms@kernel.org>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://patch.msgid.link/20240702-bond-oob-v6-1-2dfdba195c19@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_options.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
+index c9aa28eee191d..a818720ef0e49 100644
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -1074,9 +1074,9 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
+ __be32 target;
+
+ if (newval->string) {
+- if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) {
+- netdev_err(bond->dev, "invalid ARP target %pI4 specified\n",
+- &target);
++ if (strlen(newval->string) < 1 ||
++ !in4_pton(newval->string + 1, -1, (u8 *)&target, -1, NULL)) {
++ netdev_err(bond->dev, "invalid ARP target specified\n");
+ return ret;
+ }
+ if (newval->string[0] == '+')
+--
+2.43.0
+
--- /dev/null
+From 7938be5c38869a82b74ebfcf9d2c5be4c10c706f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 18:16:49 +0900
+Subject: inet_diag: Initialize pad field in struct inet_diag_req_v2
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 61cf1c739f08190a4cbf047b9fbb192a94d87e3f ]
+
+KMSAN reported uninit-value access in raw_lookup() [1]. Diag for raw
+sockets uses the pad field in struct inet_diag_req_v2 for the
+underlying protocol. This field corresponds to the sdiag_raw_protocol
+field in struct inet_diag_req_raw.
+
+inet_diag_get_exact_compat() converts inet_diag_req to
+inet_diag_req_v2, but leaves the pad field uninitialized. So the issue
+occurs when raw_lookup() accesses the sdiag_raw_protocol field.
+
+Fix this by initializing the pad field in
+inet_diag_get_exact_compat(). Also, do the same fix in
+inet_diag_dump_compat() to avoid the similar issue in the future.
+
+[1]
+BUG: KMSAN: uninit-value in raw_lookup net/ipv4/raw_diag.c:49 [inline]
+BUG: KMSAN: uninit-value in raw_sock_get+0x657/0x800 net/ipv4/raw_diag.c:71
+ raw_lookup net/ipv4/raw_diag.c:49 [inline]
+ raw_sock_get+0x657/0x800 net/ipv4/raw_diag.c:71
+ raw_diag_dump_one+0xa1/0x660 net/ipv4/raw_diag.c:99
+ inet_diag_cmd_exact+0x7d9/0x980
+ inet_diag_get_exact_compat net/ipv4/inet_diag.c:1404 [inline]
+ inet_diag_rcv_msg_compat+0x469/0x530 net/ipv4/inet_diag.c:1426
+ sock_diag_rcv_msg+0x23d/0x740 net/core/sock_diag.c:282
+ netlink_rcv_skb+0x537/0x670 net/netlink/af_netlink.c:2564
+ sock_diag_rcv+0x35/0x40 net/core/sock_diag.c:297
+ netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline]
+ netlink_unicast+0xe74/0x1240 net/netlink/af_netlink.c:1361
+ netlink_sendmsg+0x10c6/0x1260 net/netlink/af_netlink.c:1905
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ __sock_sendmsg+0x332/0x3d0 net/socket.c:745
+ ____sys_sendmsg+0x7f0/0xb70 net/socket.c:2585
+ ___sys_sendmsg+0x271/0x3b0 net/socket.c:2639
+ __sys_sendmsg net/socket.c:2668 [inline]
+ __do_sys_sendmsg net/socket.c:2677 [inline]
+ __se_sys_sendmsg net/socket.c:2675 [inline]
+ __x64_sys_sendmsg+0x27e/0x4a0 net/socket.c:2675
+ x64_sys_call+0x135e/0x3ce0 arch/x86/include/generated/asm/syscalls_64.h:47
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xd9/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Uninit was stored to memory at:
+ raw_sock_get+0x650/0x800 net/ipv4/raw_diag.c:71
+ raw_diag_dump_one+0xa1/0x660 net/ipv4/raw_diag.c:99
+ inet_diag_cmd_exact+0x7d9/0x980
+ inet_diag_get_exact_compat net/ipv4/inet_diag.c:1404 [inline]
+ inet_diag_rcv_msg_compat+0x469/0x530 net/ipv4/inet_diag.c:1426
+ sock_diag_rcv_msg+0x23d/0x740 net/core/sock_diag.c:282
+ netlink_rcv_skb+0x537/0x670 net/netlink/af_netlink.c:2564
+ sock_diag_rcv+0x35/0x40 net/core/sock_diag.c:297
+ netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline]
+ netlink_unicast+0xe74/0x1240 net/netlink/af_netlink.c:1361
+ netlink_sendmsg+0x10c6/0x1260 net/netlink/af_netlink.c:1905
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ __sock_sendmsg+0x332/0x3d0 net/socket.c:745
+ ____sys_sendmsg+0x7f0/0xb70 net/socket.c:2585
+ ___sys_sendmsg+0x271/0x3b0 net/socket.c:2639
+ __sys_sendmsg net/socket.c:2668 [inline]
+ __do_sys_sendmsg net/socket.c:2677 [inline]
+ __se_sys_sendmsg net/socket.c:2675 [inline]
+ __x64_sys_sendmsg+0x27e/0x4a0 net/socket.c:2675
+ x64_sys_call+0x135e/0x3ce0 arch/x86/include/generated/asm/syscalls_64.h:47
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xd9/0x1e0 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Local variable req.i created at:
+ inet_diag_get_exact_compat net/ipv4/inet_diag.c:1396 [inline]
+ inet_diag_rcv_msg_compat+0x2a6/0x530 net/ipv4/inet_diag.c:1426
+ sock_diag_rcv_msg+0x23d/0x740 net/core/sock_diag.c:282
+
+CPU: 1 PID: 8888 Comm: syz-executor.6 Not tainted 6.10.0-rc4-00217-g35bb670d65fc #32
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014
+
+Fixes: 432490f9d455 ("net: ip, diag -- Add diag interface for raw sockets")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240703091649.111773-1-syoshida@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_diag.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
+index d07917059d70f..a876991ecb2ca 100644
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -1097,6 +1097,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
+ req.sdiag_family = AF_UNSPEC; /* compatibility */
+ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
++ req.pad = 0;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+@@ -1115,6 +1116,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
+ req.sdiag_family = rc->idiag_family;
+ req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
++ req.pad = 0;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+--
+2.43.0
+
--- /dev/null
+From 6046225849d8e1ce652e2180c75d3bc298179c0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Jun 2021 17:51:21 -0700
+Subject: net: tcp better handling of reordering then loss cases
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit a29cb6914681a55667436a9eb7a42e28da8cf387 ]
+
+This patch aims to improve the situation when reordering and loss are
+ocurring in the same flight of packets.
+
+Previously the reordering would first induce a spurious recovery, then
+the subsequent ACK may undo the cwnd (based on the timestamps e.g.).
+However the current loss recovery does not proceed to invoke
+RACK to install a reordering timer. If some packets are also lost, this
+may lead to a long RTO-based recovery. An example is
+https://groups.google.com/g/bbr-dev/c/OFHADvJbTEI
+
+The solution is to after reverting the recovery, always invoke RACK
+to either mount the RACK timer to fast retransmit after the reordering
+window, or restarts the recovery if new loss is identified. Hence
+it is possible the sender may go from Recovery to Disorder/Open to
+Recovery again in one ACK.
+
+Reported-by: mingkun bian <bianmingkun@gmail.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: a6458ab7fd4f ("UPSTREAM: tcp: fix DSACK undo in fast recovery to call tcp_try_to_open()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 45 +++++++++++++++++++++++++-------------------
+ 1 file changed, 26 insertions(+), 19 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 88216b87c986f..5503f130cc6dd 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2727,8 +2727,17 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
+ *rexmit = REXMIT_LOST;
+ }
+
++static bool tcp_force_fast_retransmit(struct sock *sk)
++{
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ return after(tcp_highest_sack_seq(tp),
++ tp->snd_una + tp->reordering * tp->mss_cache);
++}
++
+ /* Undo during fast recovery after partial ACK. */
+-static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
++static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
++ bool *do_lost)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+@@ -2753,7 +2762,9 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
+ tcp_undo_cwnd_reduction(sk, true);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+ tcp_try_keep_open(sk);
+- return true;
++ } else {
++ /* Partial ACK arrived. Force fast retransmit. */
++ *do_lost = tcp_force_fast_retransmit(sk);
+ }
+ return false;
+ }
+@@ -2777,14 +2788,6 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
+ }
+ }
+
+-static bool tcp_force_fast_retransmit(struct sock *sk)
+-{
+- struct tcp_sock *tp = tcp_sk(sk);
+-
+- return after(tcp_highest_sack_seq(tp),
+- tp->snd_una + tp->reordering * tp->mss_cache);
+-}
+-
+ /* Process an event, which can update packets-in-flight not trivially.
+ * Main goal of this function is to calculate new estimate for left_out,
+ * taking into account both packets sitting in receiver's buffer and
+@@ -2854,17 +2857,21 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ if (!(flag & FLAG_SND_UNA_ADVANCED)) {
+ if (tcp_is_reno(tp))
+ tcp_add_reno_sack(sk, num_dupack, ece_ack);
+- } else {
+- if (tcp_try_undo_partial(sk, prior_snd_una))
+- return;
+- /* Partial ACK arrived. Force fast retransmit. */
+- do_lost = tcp_force_fast_retransmit(sk);
+- }
+- if (tcp_try_undo_dsack(sk)) {
+- tcp_try_keep_open(sk);
++ } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
+ return;
+- }
++
++ if (tcp_try_undo_dsack(sk))
++ tcp_try_keep_open(sk);
++
+ tcp_identify_packet_loss(sk, ack_flag);
++ if (icsk->icsk_ca_state != TCP_CA_Recovery) {
++ if (!tcp_time_to_recover(sk, flag))
++ return;
++ /* Undo reverts the recovery state. If loss is evident,
++ * starts a new recovery (e.g. reordering then loss);
++ */
++ tcp_enter_recovery(sk, ece_ack);
++ }
+ break;
+ case TCP_CA_Loss:
+ tcp_process_loss(sk, flag, num_dupack, rexmit);
+--
+2.43.0
+
--- /dev/null
+From c4c1dfbeb7daaed0c87634fbf979eda069d4a844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jul 2024 22:53:48 +0000
+Subject: selftests: fix OOM in msg_zerocopy selftest
+
+From: Zijian Zhang <zijianzhang@bytedance.com>
+
+[ Upstream commit af2b7e5b741aaae9ffbba2c660def434e07aa241 ]
+
+In selftests/net/msg_zerocopy.c, it has a while loop keeps calling sendmsg
+on a socket with MSG_ZEROCOPY flag, and it will recv the notifications
+until the socket is not writable. Typically, it will start the receiving
+process after around 30+ sendmsgs. However, as the introduction of commit
+dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale"), the sender is
+always writable and does not get any chance to run recv notifications.
+The selftest always exits with OUT_OF_MEMORY because the memory used by
+opt_skb exceeds the net.core.optmem_max. Meanwhile, it could be set to a
+different value to trigger OOM on older kernels too.
+
+Thus, we introduce "cfg_notification_limit" to force sender to receive
+notifications after some number of sendmsgs.
+
+Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")
+Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
+Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240701225349.3395580-2-zijianzhang@bytedance.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/msg_zerocopy.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
+index c539591937a17..b7dc9f3617572 100644
+--- a/tools/testing/selftests/net/msg_zerocopy.c
++++ b/tools/testing/selftests/net/msg_zerocopy.c
+@@ -85,6 +85,7 @@ static bool cfg_rx;
+ static int cfg_runtime_ms = 4200;
+ static int cfg_verbose;
+ static int cfg_waittime_ms = 500;
++static int cfg_notification_limit = 32;
+ static bool cfg_zerocopy;
+
+ static socklen_t cfg_alen;
+@@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
+ static long packets, bytes, completions, expected_completions;
+ static int zerocopied = -1;
+ static uint32_t next_completion;
++static uint32_t sends_since_notify;
+
+ static unsigned long gettimeofday_ms(void)
+ {
+@@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+ error(1, errno, "send");
+ if (cfg_verbose && ret != len)
+ fprintf(stderr, "send: ret=%u != %u\n", ret, len);
++ sends_since_notify++;
+
+ if (len) {
+ packets++;
+@@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
+ static void do_recv_completions(int fd, int domain)
+ {
+ while (do_recv_completion(fd, domain)) {}
++ sends_since_notify = 0;
+ }
+
+ /* Wait for all remaining completions on the errqueue */
+@@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
+ else
+ do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+
++ if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
++ do_recv_completions(fd, domain);
++
+ while (!do_poll(fd, POLLOUT)) {
+ if (cfg_zerocopy)
+ do_recv_completions(fd, domain);
+@@ -707,7 +714,7 @@ static void parse_opts(int argc, char **argv)
+
+ cfg_payload_len = max_payload_len;
+
+- while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
++ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+@@ -735,6 +742,9 @@ static void parse_opts(int argc, char **argv)
+ if (cfg_ifindex == 0)
+ error(1, errno, "invalid iface: %s", optarg);
+ break;
++ case 'l':
++ cfg_notification_limit = strtoul(optarg, NULL, 0);
++ break;
+ case 'm':
+ cfg_cork_mixed = true;
+ break;
+--
+2.43.0
+
--- /dev/null
+From 6fc57a0e83fe169ef435f3c8604033b87e68e698 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Jul 2024 22:53:49 +0000
+Subject: selftests: make order checking verbose in msg_zerocopy selftest
+
+From: Zijian Zhang <zijianzhang@bytedance.com>
+
+[ Upstream commit 7d6d8f0c8b700c9493f2839abccb6d29028b4219 ]
+
+We find that when lock debugging is on, notifications may not come in
+order. Thus, we have order checking outputs managed by cfg_verbose, to
+avoid too many outputs in this case.
+
+Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")
+Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
+Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240701225349.3395580-3-zijianzhang@bytedance.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/msg_zerocopy.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
+index b7dc9f3617572..67fa124c06864 100644
+--- a/tools/testing/selftests/net/msg_zerocopy.c
++++ b/tools/testing/selftests/net/msg_zerocopy.c
+@@ -438,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
+ /* Detect notification gaps. These should not happen often, if at all.
+ * Gaps can occur due to drops, reordering and retransmissions.
+ */
+- if (lo != next_completion)
++ if (cfg_verbose && lo != next_completion)
+ fprintf(stderr, "gap: %u..%u does not append to %u\n",
+ lo, hi, next_completion);
+ next_completion = hi + 1;
+--
+2.43.0
+
jffs2-fix-potential-illegal-address-access-in-jffs2_.patch
s390-mark-psw-in-__load_psw_mask-as-__unitialized.patch
s390-pkey-wipe-sensitive-data-on-failure.patch
+tcp-take-care-of-compressed-acks-in-tcp_add_reno_sac.patch
+tcp-tcp_mark_head_lost-is-only-valid-for-sack-tcp.patch
+tcp-add-ece_ack-flag-to-reno-sack-functions.patch
+net-tcp-better-handling-of-reordering-then-loss-case.patch
+upstream-tcp-fix-dsack-undo-in-fast-recovery-to-call.patch
+tcp_metrics-validate-source-addr-length.patch
+bonding-fix-out-of-bounds-read-in-bond_option_arp_ip.patch
+selftests-fix-oom-in-msg_zerocopy-selftest.patch
+selftests-make-order-checking-verbose-in-msg_zerocop.patch
+inet_diag-initialize-pad-field-in-struct-inet_diag_r.patch
--- /dev/null
+From 16a7188eb09089f5571a1943429ee30062eef1bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2020 21:05:33 -0700
+Subject: tcp: add ece_ack flag to reno sack functions
+
+From: Yousuk Seung <ysseung@google.com>
+
+[ Upstream commit c634e34f6ebfb75259e6ce467523fd3adf30d3d2 ]
+
+Pass a boolean flag that tells the ECE state of the current ack to reno
+sack functions. This is pure refactor for future patches to improve
+tracking delivered counts.
+
+Signed-off-by: Yousuk Seung <ysseung@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: a6458ab7fd4f ("UPSTREAM: tcp: fix DSACK undo in fast recovery to call tcp_try_to_open()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e51aa5a149c0f..88216b87c986f 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1897,7 +1897,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+
+ /* Emulate SACKs for SACKless connection: account for a new dupack. */
+
+-static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
++static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
+ {
+ if (num_dupack) {
+ struct tcp_sock *tp = tcp_sk(sk);
+@@ -1915,7 +1915,7 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
+
+ /* Account for ACK, ACKing some data in Reno Recovery phase. */
+
+-static void tcp_remove_reno_sacks(struct sock *sk, int acked)
++static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+@@ -2720,7 +2720,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
+ * delivered. Lower inflight to clock out (re)tranmissions.
+ */
+ if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
+- tcp_add_reno_sack(sk, num_dupack);
++ tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
+ else if (flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
+ }
+@@ -2803,6 +2803,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int fast_rexmit = 0, flag = *ack_flag;
++ bool ece_ack = flag & FLAG_ECE;
+ bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
+ tcp_force_fast_retransmit(sk));
+
+@@ -2811,7 +2812,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+
+ /* Now state machine starts.
+ * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
+- if (flag & FLAG_ECE)
++ if (ece_ack)
+ tp->prior_ssthresh = 0;
+
+ /* B. In all the states check for reneging SACKs. */
+@@ -2852,7 +2853,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ case TCP_CA_Recovery:
+ if (!(flag & FLAG_SND_UNA_ADVANCED)) {
+ if (tcp_is_reno(tp))
+- tcp_add_reno_sack(sk, num_dupack);
++ tcp_add_reno_sack(sk, num_dupack, ece_ack);
+ } else {
+ if (tcp_try_undo_partial(sk, prior_snd_una))
+ return;
+@@ -2877,7 +2878,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ if (tcp_is_reno(tp)) {
+ if (flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
+- tcp_add_reno_sack(sk, num_dupack);
++ tcp_add_reno_sack(sk, num_dupack, ece_ack);
+ }
+
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder)
+@@ -2901,7 +2902,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ }
+
+ /* Otherwise enter Recovery state */
+- tcp_enter_recovery(sk, (flag & FLAG_ECE));
++ tcp_enter_recovery(sk, ece_ack);
+ fast_rexmit = 1;
+ }
+
+@@ -3077,7 +3078,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
+ */
+ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ u32 prior_snd_una,
+- struct tcp_sacktag_state *sack)
++ struct tcp_sacktag_state *sack, bool ece_ack)
+ {
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u64 first_ackt, last_ackt;
+@@ -3215,7 +3216,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+ }
+
+ if (tcp_is_reno(tp)) {
+- tcp_remove_reno_sacks(sk, pkts_acked);
++ tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
+
+ /* If any of the cumulatively ACKed segments was
+ * retransmitted, non-SACK case cannot confirm that
+@@ -3720,7 +3721,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ goto no_queue;
+
+ /* See if we can take anything off of the retransmit queue. */
+- flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
++ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
++ flag & FLAG_ECE);
+
+ tcp_rack_update_reo_wnd(sk, &rs);
+
+--
+2.43.0
+
--- /dev/null
+From bda730fdb80a9e6c557112bf3ca75eacfe3a79ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Nov 2018 14:42:01 -0800
+Subject: tcp: take care of compressed acks in tcp_add_reno_sack()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 19119f298bb1f2af3bb1093f5f2a1fed8da94e37 ]
+
+Neal pointed out that non sack flows might suffer from ACK compression
+added in the following patch ("tcp: implement coalescing on backlog queue")
+
+Instead of tweaking tcp_add_backlog() we can take into
+account how many ACK were coalesced, this information
+will be available in skb_shinfo(skb)->gso_segs
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: a6458ab7fd4f ("UPSTREAM: tcp: fix DSACK undo in fast recovery to call tcp_try_to_open()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 58 +++++++++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 25 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 6a8c7c521d36e..022d75c67096a 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1897,16 +1897,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+
+ /* Emulate SACKs for SACKless connection: account for a new dupack. */
+
+-static void tcp_add_reno_sack(struct sock *sk)
++static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
+ {
+- struct tcp_sock *tp = tcp_sk(sk);
+- u32 prior_sacked = tp->sacked_out;
++ if (num_dupack) {
++ struct tcp_sock *tp = tcp_sk(sk);
++ u32 prior_sacked = tp->sacked_out;
++ s32 delivered;
+
+- tp->sacked_out++;
+- tcp_check_reno_reordering(sk, 0);
+- if (tp->sacked_out > prior_sacked)
+- tp->delivered++; /* Some out-of-order packet is delivered */
+- tcp_verify_left_out(tp);
++ tp->sacked_out += num_dupack;
++ tcp_check_reno_reordering(sk, 0);
++ delivered = tp->sacked_out - prior_sacked;
++ if (delivered > 0)
++ tp->delivered += delivered;
++ tcp_verify_left_out(tp);
++ }
+ }
+
+ /* Account for ACK, ACKing some data in Reno Recovery phase. */
+@@ -2687,7 +2691,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
+ /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
+ * recovered or spurious. Otherwise retransmits more on partial ACKs.
+ */
+-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
++static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
+ int *rexmit)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+@@ -2706,7 +2710,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+ return;
+
+ if (after(tp->snd_nxt, tp->high_seq)) {
+- if (flag & FLAG_DATA_SACKED || is_dupack)
++ if (flag & FLAG_DATA_SACKED || num_dupack)
+ tp->frto = 0; /* Step 3.a. loss was real */
+ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
+ tp->high_seq = tp->snd_nxt;
+@@ -2732,8 +2736,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+ /* A Reno DUPACK means new data in F-RTO step 2.b above are
+ * delivered. Lower inflight to clock out (re)tranmissions.
+ */
+- if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
+- tcp_add_reno_sack(sk);
++ if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
++ tcp_add_reno_sack(sk, num_dupack);
+ else if (flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
+ }
+@@ -2811,13 +2815,13 @@ static bool tcp_force_fast_retransmit(struct sock *sk)
+ * tcp_xmit_retransmit_queue().
+ */
+ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+- bool is_dupack, int *ack_flag, int *rexmit)
++ int num_dupack, int *ack_flag, int *rexmit)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int fast_rexmit = 0, flag = *ack_flag;
+- bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+- tcp_force_fast_retransmit(sk));
++ bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
++ tcp_force_fast_retransmit(sk));
+
+ if (!tp->packets_out && tp->sacked_out)
+ tp->sacked_out = 0;
+@@ -2864,8 +2868,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ switch (icsk->icsk_ca_state) {
+ case TCP_CA_Recovery:
+ if (!(flag & FLAG_SND_UNA_ADVANCED)) {
+- if (tcp_is_reno(tp) && is_dupack)
+- tcp_add_reno_sack(sk);
++ if (tcp_is_reno(tp))
++ tcp_add_reno_sack(sk, num_dupack);
+ } else {
+ if (tcp_try_undo_partial(sk, prior_snd_una))
+ return;
+@@ -2880,7 +2884,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ tcp_identify_packet_loss(sk, ack_flag);
+ break;
+ case TCP_CA_Loss:
+- tcp_process_loss(sk, flag, is_dupack, rexmit);
++ tcp_process_loss(sk, flag, num_dupack, rexmit);
+ tcp_identify_packet_loss(sk, ack_flag);
+ if (!(icsk->icsk_ca_state == TCP_CA_Open ||
+ (*ack_flag & FLAG_LOST_RETRANS)))
+@@ -2891,8 +2895,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ if (tcp_is_reno(tp)) {
+ if (flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
+- if (is_dupack)
+- tcp_add_reno_sack(sk);
++ tcp_add_reno_sack(sk, num_dupack);
+ }
+
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder)
+@@ -3623,7 +3626,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ bool is_sack_reneg = tp->is_sack_reneg;
+ u32 ack_seq = TCP_SKB_CB(skb)->seq;
+ u32 ack = TCP_SKB_CB(skb)->ack_seq;
+- bool is_dupack = false;
++ int num_dupack = 0;
+ int prior_packets = tp->packets_out;
+ u32 delivered = tp->delivered;
+ u32 lost = tp->lost;
+@@ -3743,8 +3746,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ tcp_process_tlp_ack(sk, ack, flag);
+
+ if (tcp_ack_is_dubious(sk, flag)) {
+- is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
++ if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
++ num_dupack = 1;
++ /* Consider if pure acks were aggregated in tcp_add_backlog() */
++ if (!(flag & FLAG_DATA))
++ num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
++ }
++ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
+ &rexmit);
+ }
+
+@@ -3766,7 +3774,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ no_queue:
+ /* If data was DSACKed, see if we can undo a cwnd reduction. */
+ if (flag & FLAG_DSACKING_ACK) {
+- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
++ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
+ &rexmit);
+ tcp_newly_delivered(sk, delivered, flag);
+ }
+@@ -3791,7 +3799,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
+ if (TCP_SKB_CB(skb)->sacked) {
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+ &sack_state);
+- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
++ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
+ &rexmit);
+ tcp_newly_delivered(sk, delivered, flag);
+ tcp_xmit_recovery(sk, rexmit);
+--
+2.43.0
+
--- /dev/null
+From e54288a45229e4435add827417471cfc86b1ead3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 May 2020 11:08:30 +0800
+Subject: tcp: tcp_mark_head_lost is only valid for sack-tcp
+
+From: zhang kai <zhangkaiheb@126.com>
+
+[ Upstream commit 636ef28d6e4d174e424102466caf572b0406fb0e ]
+
+so tcp_is_sack/reno checks are removed from tcp_mark_head_lost.
+
+Signed-off-by: zhang kai <zhangkaiheb@126.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: a6458ab7fd4f ("UPSTREAM: tcp: fix DSACK undo in fast recovery to call tcp_try_to_open()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 32 +++++++-------------------------
+ 1 file changed, 7 insertions(+), 25 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 022d75c67096a..e51aa5a149c0f 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2193,8 +2193,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
+ }
+
+ /* Detect loss in event "A" above by marking head of queue up as lost.
+- * For non-SACK(Reno) senders, the first "packets" number of segments
+- * are considered lost. For RFC3517 SACK, a segment is considered lost if it
++ * For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
+ */
+@@ -2202,10 +2201,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+- int cnt, oldcnt, lost;
+- unsigned int mss;
++ int cnt;
+ /* Use SACK to deduce losses of new sequences sent during recovery */
+- const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
++ const u32 loss_high = tp->snd_nxt;
+
+ WARN_ON(packets > tp->packets_out);
+ skb = tp->lost_skb_hint;
+@@ -2228,26 +2226,11 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
+ if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
+ break;
+
+- oldcnt = cnt;
+- if (tcp_is_reno(tp) ||
+- (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
++ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ cnt += tcp_skb_pcount(skb);
+
+- if (cnt > packets) {
+- if (tcp_is_sack(tp) ||
+- (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+- (oldcnt >= packets))
+- break;
+-
+- mss = tcp_skb_mss(skb);
+- /* If needed, chop off the prefix to mark as lost. */
+- lost = (packets - oldcnt) * mss;
+- if (lost < skb->len &&
+- tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+- lost, mss, GFP_ATOMIC) < 0)
+- break;
+- cnt = packets;
+- }
++ if (cnt > packets)
++ break;
+
+ tcp_skb_mark_lost(tp, skb);
+
+@@ -2874,8 +2857,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ if (tcp_try_undo_partial(sk, prior_snd_una))
+ return;
+ /* Partial ACK arrived. Force fast retransmit. */
+- do_lost = tcp_is_reno(tp) ||
+- tcp_force_fast_retransmit(sk);
++ do_lost = tcp_force_fast_retransmit(sk);
+ }
+ if (tcp_try_undo_dsack(sk)) {
+ tcp_try_keep_open(sk);
+--
+2.43.0
+
--- /dev/null
+From 4641ea3b4984a1a80d58dacdcaf0b77e5de484be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jun 2024 14:25:00 -0700
+Subject: tcp_metrics: validate source addr length
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 66be40e622e177316ae81717aa30057ba9e61dff ]
+
+I don't see anything checking that TCP_METRICS_ATTR_SADDR_IPV4
+is at least 4 bytes long, and the policy doesn't have an entry
+for this attribute at all (neither does it for IPv6 but v6 is
+manually validated).
+
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Fixes: 3e7013ddf55a ("tcp: metrics: Allow selective get/del of tcp-metrics based on src IP")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 60619b1f4acdc..1bfbb8f8e0b7c 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -624,6 +624,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] =
+ [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr), },
++ [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
+ /* Following attributes are not received for GET/DEL,
+ * we keep them for reference
+ */
+--
+2.43.0
+
--- /dev/null
+From a5ad0648fbf7a93191fd21aad7108fb06594d33e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jun 2024 22:42:27 -0400
+Subject: UPSTREAM: tcp: fix DSACK undo in fast recovery to call
+ tcp_try_to_open()
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit a6458ab7fd4f427d4f6f54380453ad255b7fde83 ]
+
+In some production workloads we noticed that connections could
+sometimes close extremely prematurely with ETIMEDOUT after
+transmitting only 1 TLP and RTO retransmission (when we would normally
+expect roughly tcp_retries2 = TCP_RETR2 = 15 RTOs before a connection
+closes with ETIMEDOUT).
+
+From tracing we determined that these workloads can suffer from a
+scenario where in fast recovery, after some retransmits, a DSACK undo
+can happen at a point where the scoreboard is totally clear (we have
+retrans_out == sacked_out == lost_out == 0). In such cases, calling
+tcp_try_keep_open() means that we do not execute any code path that
+clears tp->retrans_stamp to 0. That means that tp->retrans_stamp can
+remain erroneously set to the start time of the undone fast recovery,
+even after the fast recovery is undone. If minutes or hours elapse,
+and then a TLP/RTO/RTO sequence occurs, then the start_ts value in
+retransmits_timed_out() (which is from tp->retrans_stamp) will be
+erroneously ancient (left over from the fast recovery undone via
+DSACKs). Thus this ancient tp->retrans_stamp value can cause the
+connection to die very prematurely with ETIMEDOUT via
+tcp_write_err().
+
+The fix: we change DSACK undo in fast recovery (TCP_CA_Recovery) to
+call tcp_try_to_open() instead of tcp_try_keep_open(). This ensures
+that if no retransmits are in flight at the time of DSACK undo in fast
+recovery then we properly zero retrans_stamp. Note that calling
+tcp_try_to_open() is more consistent with other loss recovery
+behavior, since normal fast recovery (CA_Recovery) and RTO recovery
+(CA_Loss) both normally end when tp->snd_una meets or exceeds
+tp->high_seq and then in tcp_fastretrans_alert() the "default" switch
+case executes tcp_try_to_open(). Also note that by inspection this
+change to call tcp_try_to_open() implies at least one other nice bug
+fix, where now an ECE-marked DSACK that causes an undo will properly
+invoke tcp_enter_cwr() rather than ignoring the ECE mark.
+
+Fixes: c7d9d6a185a7 ("tcp: undo on DSACK during recovery")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 5503f130cc6dd..9a66c37958451 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2861,7 +2861,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ return;
+
+ if (tcp_try_undo_dsack(sk))
+- tcp_try_keep_open(sk);
++ tcp_try_to_open(sk, flag);
+
+ tcp_identify_packet_loss(sk, ack_flag);
+ if (icsk->icsk_ca_state != TCP_CA_Recovery) {
+--
+2.43.0
+