From bb9012621b5e31e4d5931bfee0d103405458077b Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 13 Jul 2024 09:26:40 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- ...ential-use-after-free-in-posix_lock_.patch | 50 ++++++ ...-value-stored-to-dentry-d_flags-inst.patch | 44 +++++ ...antiq_etop-fix-double-free-in-detach.patch | 43 +++++ ...top-add-blank-line-after-declaration.patch | 40 +++++ ...-incorrect-value-output-on-error-pat.patch | 44 +++++ ...ed-as-lcp-but-actually-malformed-pac.patch | 67 ++++++++ queue-5.4/series | 10 ++ ...cp_info-status-for-failed-client-tfo.patch | 162 ++++++++++++++++++ ...t-undo-caused-by-dsack-of-tlp-retran.patch | 107 ++++++++++++ ...rcu_free-earlier-in-udp_lib_get_port.patch | 123 +++++++++++++ ...gative-dentry-count-when-on-shrinker.patch | 88 ++++++++++ 11 files changed, 778 insertions(+) create mode 100644 queue-5.4/filelock-fix-potential-use-after-free-in-posix_lock_.patch create mode 100644 queue-5.4/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch create mode 100644 queue-5.4/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch create mode 100644 queue-5.4/net-lantiq_etop-add-blank-line-after-declaration.patch create mode 100644 queue-5.4/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch create mode 100644 queue-5.4/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch create mode 100644 queue-5.4/tcp-add-tcp_info-status-for-failed-client-tfo.patch create mode 100644 queue-5.4/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch create mode 100644 queue-5.4/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch create mode 100644 queue-5.4/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch diff --git a/queue-5.4/filelock-fix-potential-use-after-free-in-posix_lock_.patch b/queue-5.4/filelock-fix-potential-use-after-free-in-posix_lock_.patch new file mode 100644 index 00000000000..c1fea89ab32 --- /dev/null +++ b/queue-5.4/filelock-fix-potential-use-after-free-in-posix_lock_.patch @@ -0,0 +1,50 @@ +From 75749c9bf4890a5f07d88fdcdbfb321a6e0ee778 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2024 18:44:48 -0400 +Subject: filelock: fix potential use-after-free in posix_lock_inode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jeff Layton + +[ Upstream commit 1b3ec4f7c03d4b07bad70697d7e2f4088d2cfe92 ] + +Light Hsieh reported a KASAN UAF warning in trace_posix_lock_inode(). +The request pointer had been changed earlier to point to a lock entry +that was added to the inode's list. However, before the tracepoint could +fire, another task raced in and freed that lock. + +Fix this by moving the tracepoint inside the spinlock, which should +ensure that this doesn't happen. + +Fixes: 74f6f5912693 ("locks: fix KASAN: use-after-free in trace_event_raw_event_filelock_lock") +Link: https://lore.kernel.org/linux-fsdevel/724ffb0a2962e912ea62bb0515deadf39c325112.camel@kernel.org/ +Reported-by: Light Hsieh (謝明燈) +Signed-off-by: Jeff Layton +Link: https://lore.kernel.org/r/20240702-filelock-6-10-v1-1-96e766aadc98@kernel.org +Reviewed-by: Alexander Aring +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/locks.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/locks.c b/fs/locks.c +index 90f92784aa554..bafe11deea56b 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -1336,9 +1336,9 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, + locks_wake_up_blocks(left); + } + out: ++ trace_posix_lock_inode(inode, request, error); + spin_unlock(&ctx->flc_lock); + percpu_up_read(&file_rwsem); +- trace_posix_lock_inode(inode, request, error); + /* + * Free any unused locks. + */ +-- +2.43.0 + diff --git a/queue-5.4/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch b/queue-5.4/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch new file mode 100644 index 00000000000..04284f083a8 --- /dev/null +++ b/queue-5.4/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch @@ -0,0 +1,44 @@ +From 8676d75b8e971d92eddcb75abf73218eac9c800c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Apr 2024 10:10:08 +0800 +Subject: fs/dcache: Re-use value stored to dentry->d_flags instead of + re-reading + +From: linke li + +[ Upstream commit 8bfb40be31ddea0cb4664b352e1797cfe6c91976 ] + +Currently, the __d_clear_type_and_inode() writes the value flags to +dentry->d_flags, then immediately re-reads it in order to use it in a if +statement. This re-read is useless because no other update to +dentry->d_flags can occur at this point. + +This commit therefore re-use flags in the if statement instead of +re-reading dentry->d_flags. + +Signed-off-by: linke li +Link: https://lore.kernel.org/r/tencent_5E187BD0A61BA28605E85405F15228254D0A@qq.com +Reviewed-by: Jan Kara +Signed-off-by: Christian Brauner +Stable-dep-of: aabfe57ebaa7 ("vfs: don't mod negative dentry count when on shrinker list") +Signed-off-by: Sasha Levin +--- + fs/dcache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dcache.c b/fs/dcache.c +index 43864a276faa2..9505e5df30b74 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -329,7 +329,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + WRITE_ONCE(dentry->d_flags, flags); + dentry->d_inode = NULL; +- if (dentry->d_flags & DCACHE_LRU_LIST) ++ if (flags & DCACHE_LRU_LIST) + this_cpu_inc(nr_dentry_negative); + } + +-- +2.43.0 + diff --git a/queue-5.4/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch b/queue-5.4/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch new file mode 100644 index 00000000000..c8acf4472f0 --- /dev/null +++ b/queue-5.4/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch @@ -0,0 +1,43 @@ +From 3776435dae51007593012f03794346308a504b07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Jul 2024 22:58:26 +0200 +Subject: net: ethernet: lantiq_etop: fix double free in detach + +From: Aleksander Jan Bajkowski + +[ Upstream commit e1533b6319ab9c3a97dad314dd88b3783bc41b69 ] + +The number of the currently released descriptor is never incremented +which results in the same skb being released multiple times. + +Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver") +Reported-by: Joe Perches +Closes: https://lore.kernel.org/all/fc1bf93d92bb5b2f99c6c62745507cc22f3a7b2d.camel@perches.com/ +Signed-off-by: Aleksander Jan Bajkowski +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20240708205826.5176-1-olek2@wp.pl +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/lantiq_etop.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c +index 98aa172da051f..932796080c7f7 100644 +--- a/drivers/net/ethernet/lantiq_etop.c ++++ b/drivers/net/ethernet/lantiq_etop.c +@@ -213,9 +213,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch) + if (ch->dma.irq) + free_irq(ch->dma.irq, priv); + if (IS_RX(ch->idx)) { +- int desc; ++ struct ltq_dma_channel *dma = &ch->dma; + +- for (desc = 0; desc < LTQ_DESC_NUM; desc++) ++ for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++) + dev_kfree_skb_any(ch->skb[ch->dma.desc]); + } + } +-- +2.43.0 + diff --git a/queue-5.4/net-lantiq_etop-add-blank-line-after-declaration.patch b/queue-5.4/net-lantiq_etop-add-blank-line-after-declaration.patch new file mode 100644 index 00000000000..0139f783506 --- /dev/null +++ b/queue-5.4/net-lantiq_etop-add-blank-line-after-declaration.patch @@ -0,0 +1,40 @@ +From b99b3b118bd5b0f5f147cdbe25dbbaf6b37a3e15 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Dec 2021 23:00:31 +0100 +Subject: net: lantiq_etop: add blank line after declaration + +From: Aleksander Jan Bajkowski + +[ Upstream commit 4c46625bb586a741b8d0e6bdbddbcb2549fa1d36 ] + +This patch adds a missing line after the declaration and +fixes the checkpatch warning: + +WARNING: Missing a blank line after declarations ++ int desc; ++ for (desc = 0; desc < LTQ_DESC_NUM; desc++) + +Signed-off-by: Aleksander Jan Bajkowski +Link: https://lore.kernel.org/r/20211228220031.71576-1-olek2@wp.pl +Signed-off-by: Jakub Kicinski +Stable-dep-of: e1533b6319ab ("net: ethernet: lantiq_etop: fix double free in detach") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/lantiq_etop.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c +index 12e8b0f957d3d..98aa172da051f 100644 +--- a/drivers/net/ethernet/lantiq_etop.c ++++ b/drivers/net/ethernet/lantiq_etop.c +@@ -214,6 +214,7 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch) + free_irq(ch->dma.irq, priv); + if (IS_RX(ch->idx)) { + int desc; ++ + for (desc = 0; desc < LTQ_DESC_NUM; desc++) + dev_kfree_skb_any(ch->skb[ch->dma.desc]); + } +-- +2.43.0 + diff --git a/queue-5.4/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch b/queue-5.4/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch new file mode 100644 index 00000000000..3c59eda2e40 --- /dev/null +++ b/queue-5.4/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch @@ -0,0 +1,44 @@ +From 31b10a50a067fecdec136d0a1f73f97df6f8dba9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Jul 2024 12:53:17 +0300 +Subject: octeontx2-af: Fix incorrect value output on error path in + rvu_check_rsrc_availability() + +From: Aleksandr Mishin + +[ Upstream commit 442e26af9aa8115c96541026cbfeaaa76c85d178 ] + +In rvu_check_rsrc_availability() in case of invalid SSOW req, an incorrect +data is printed to error log. 'req->sso' value is printed instead of +'req->ssow'. Looks like "copy-paste" mistake. + +Fix this mistake by replacing 'req->sso' with 'req->ssow'. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 746ea74241fa ("octeontx2-af: Add RVU block LF provisioning support") +Signed-off-by: Aleksandr Mishin +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240705095317.12640-1-amishin@t-argos.ru +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +index 9c6307186505a..f569a98e35a02 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +@@ -1124,7 +1124,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, + if (req->ssow > block->lf.max) { + dev_err(&rvu->pdev->dev, + "Func 0x%x: Invalid SSOW req, %d > max %d\n", +- pcifunc, req->sso, block->lf.max); ++ pcifunc, req->ssow, block->lf.max); + return -EINVAL; + } + mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->type); +-- +2.43.0 + diff --git a/queue-5.4/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch b/queue-5.4/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch new file mode 100644 index 00000000000..81287cfaaf9 --- /dev/null +++ b/queue-5.4/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch @@ -0,0 +1,67 @@ +From 9d308da2bcf2c23807de4b6847b91c25b4348c66 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Jul 2024 14:56:15 +0300 +Subject: ppp: reject claimed-as-LCP but actually malformed packets + +From: Dmitry Antipov + +[ Upstream commit f2aeb7306a898e1cbd03963d376f4b6656ca2b55 ] + +Since 'ppp_async_encode()' assumes valid LCP packets (with code +from 1 to 7 inclusive), add 'ppp_check_packet()' to ensure that +LCP packet has an actual body beyond PPP_LCP header bytes, and +reject claimed-as-LCP but actually malformed data otherwise. + +Reported-by: syzbot+ec0723ba9605678b14bf@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=ec0723ba9605678b14bf +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Dmitry Antipov +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ppp/ppp_generic.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c +index 078c0f474f966..3cd4196b36b21 100644 +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -70,6 +70,7 @@ + #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ + + #define PPP_PROTO_LEN 2 ++#define PPP_LCP_HDRLEN 4 + + /* + * An instance of /dev/ppp can be associated with either a ppp +@@ -491,6 +492,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf, + return ret; + } + ++static bool ppp_check_packet(struct sk_buff *skb, size_t count) ++{ ++ /* LCP packets must include LCP header which 4 bytes long: ++ * 1-byte code, 1-byte identifier, and 2-byte length. ++ */ ++ return get_unaligned_be16(skb->data) != PPP_LCP || ++ count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN; ++} ++ + static ssize_t ppp_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) + { +@@ -513,6 +523,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, + kfree_skb(skb); + goto out; + } ++ ret = -EINVAL; ++ if (unlikely(!ppp_check_packet(skb, count))) { ++ kfree_skb(skb); ++ goto out; ++ } + + switch (pf->kind) { + case INTERFACE: +-- +2.43.0 + diff --git a/queue-5.4/series b/queue-5.4/series index 260945711ba..33cbaf1beae 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -43,3 +43,13 @@ i2c-pnx-fix-potential-deadlock-warning-from-del_time.patch alsa-hda-realtek-enable-headset-mic-of-jp-ik-leap-w5.patch nvme-multipath-find-numa-path-only-for-online-numa-n.patch nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch +filelock-fix-potential-use-after-free-in-posix_lock_.patch +fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch +vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch +tcp-add-tcp_info-status-for-failed-client-tfo.patch +tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch +octeontx2-af-fix-incorrect-value-output-on-error-pat.patch +net-lantiq_etop-add-blank-line-after-declaration.patch +net-ethernet-lantiq_etop-fix-double-free-in-detach.patch +ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch +udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch diff --git a/queue-5.4/tcp-add-tcp_info-status-for-failed-client-tfo.patch b/queue-5.4/tcp-add-tcp_info-status-for-failed-client-tfo.patch new file mode 100644 index 00000000000..7602350242b --- /dev/null +++ b/queue-5.4/tcp-add-tcp_info-status-for-failed-client-tfo.patch @@ -0,0 +1,162 @@ +From 98e5dec0c60640792db6506f6b1dc59aea2b3060 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2019 11:09:26 -0400 +Subject: tcp: add TCP_INFO status for failed client TFO + +From: Jason Baron + +[ Upstream commit 480274787d7e3458bc5a7cfbbbe07033984ad711 ] + +The TCPI_OPT_SYN_DATA bit as part of tcpi_options currently reports whether +or not data-in-SYN was ack'd on both the client and server side. We'd like +to gather more information on the client-side in the failure case in order +to indicate the reason for the failure. This can be useful for not only +debugging TFO, but also for creating TFO socket policies. For example, if +a middle box removes the TFO option or drops a data-in-SYN, we can +can detect this case, and turn off TFO for these connections saving the +extra retransmits. + +The newly added tcpi_fastopen_client_fail status is 2 bits and has the +following 4 states: + +1) TFO_STATUS_UNSPEC + +Catch-all state which includes when TFO is disabled via black hole +detection, which is indicated via LINUX_MIB_TCPFASTOPENBLACKHOLE. + +2) TFO_COOKIE_UNAVAILABLE + +If TFO_CLIENT_NO_COOKIE mode is off, this state indicates that no cookie +is available in the cache. + +3) TFO_DATA_NOT_ACKED + +Data was sent with SYN, we received a SYN/ACK but it did not cover the data +portion. Cookie is not accepted by server because the cookie may be invalid +or the server may be overloaded. + +4) TFO_SYN_RETRANSMITTED + +Data was sent with SYN, we received a SYN/ACK which did not cover the data +after at least 1 additional SYN was sent (without data). It may be the case +that a middle-box is dropping data-in-SYN packets. Thus, it would be more +efficient to not use TFO on this connection to avoid extra retransmits +during connection establishment. + +These new fields do not cover all the cases where TFO may fail, but other +failures, such as SYN/ACK + data being dropped, will result in the +connection not becoming established. And a connection blackhole after +session establishment shows up as a stalled connection. + +Signed-off-by: Jason Baron +Cc: Eric Dumazet +Cc: Neal Cardwell +Cc: Christoph Paasch +Cc: Yuchung Cheng +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Stable-dep-of: 0ec986ed7bab ("tcp: fix incorrect undo caused by DSACK of TLP retransmit") +Signed-off-by: Sasha Levin +--- + include/linux/tcp.h | 2 +- + include/uapi/linux/tcp.h | 10 +++++++++- + net/ipv4/tcp.c | 2 ++ + net/ipv4/tcp_fastopen.c | 5 ++++- + net/ipv4/tcp_input.c | 4 ++++ + 5 files changed, 20 insertions(+), 3 deletions(-) + +diff --git a/include/linux/tcp.h b/include/linux/tcp.h +index 68dacc1994376..0c1255a9d3068 100644 +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -225,7 +225,7 @@ struct tcp_sock { + fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ + fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ +- unused:2; ++ fastopen_client_fail:2; /* reason why fastopen failed */ + u8 nonagle : 4,/* Disable Nagle algorithm? */ + thin_lto : 1,/* Use linear timeouts for thin streams */ + recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ +diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h +index 81e697978e8b5..74af1f759cee4 100644 +--- a/include/uapi/linux/tcp.h ++++ b/include/uapi/linux/tcp.h +@@ -155,6 +155,14 @@ enum { + TCP_QUEUES_NR, + }; + ++/* why fastopen failed from client perspective */ ++enum tcp_fastopen_client_fail { ++ TFO_STATUS_UNSPEC, /* catch-all */ ++ TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ ++ TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ ++ TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ ++}; ++ + /* for TCP_INFO socket option */ + #define TCPI_OPT_TIMESTAMPS 1 + #define TCPI_OPT_SACK 2 +@@ -211,7 +219,7 @@ struct tcp_info { + __u8 tcpi_backoff; + __u8 tcpi_options; + __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; +- __u8 tcpi_delivery_rate_app_limited:1; ++ __u8 tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2; + + __u32 tcpi_rto; + __u32 tcpi_ato; +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index a878b8b6e0b96..54399256a4380 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2713,6 +2713,7 @@ int tcp_disconnect(struct sock *sk, int flags) + /* Clean up fastopen related fields */ + tcp_free_fastopen_req(tp); + inet->defer_connect = 0; ++ tp->fastopen_client_fail = 0; + + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + +@@ -3360,6 +3361,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) + info->tcpi_reord_seen = tp->reord_seen; + info->tcpi_rcv_ooopack = tp->rcv_ooopack; + info->tcpi_snd_wnd = tp->snd_wnd; ++ info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; + unlock_sock_fast(sk, slow); + } + EXPORT_SYMBOL_GPL(tcp_get_info); +diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c +index 35088cd30840d..38752bdedee39 100644 +--- a/net/ipv4/tcp_fastopen.c ++++ b/net/ipv4/tcp_fastopen.c +@@ -446,7 +446,10 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + cookie->len = -1; + return true; + } +- return cookie->len > 0; ++ if (cookie->len > 0) ++ return true; ++ tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE; ++ return false; + } + + /* This function checks if we want to defer sending SYN until the first +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 702f46d2f9fea..57907fe94b238 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5896,6 +5896,10 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, + tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); + + if (data) { /* Retransmit unacked data in SYN */ ++ if (tp->total_retrans) ++ tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED; ++ else ++ tp->fastopen_client_fail = TFO_DATA_NOT_ACKED; + skb_rbtree_walk_from(data) { + if (__tcp_retransmit_skb(sk, data, 1)) + break; +-- +2.43.0 + diff --git a/queue-5.4/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch b/queue-5.4/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch new file mode 100644 index 00000000000..169cf85a5a6 --- /dev/null +++ b/queue-5.4/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch @@ -0,0 +1,107 @@ +From 86779da8aa723c72ffed29675f49c1096e2400cd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Jul 2024 13:12:46 -0400 +Subject: tcp: fix incorrect undo caused by DSACK of TLP retransmit + +From: Neal Cardwell + +[ Upstream commit 0ec986ed7bab6801faed1440e8839dcc710331ff ] + +Loss recovery undo_retrans bookkeeping had a long-standing bug where a +DSACK from a spurious TLP retransmit packet could cause an erroneous +undo of a fast recovery or RTO recovery that repaired a single +really-lost packet (in a sequence range outside that of the TLP +retransmit). Basically, because the loss recovery state machine didn't +account for the fact that it sent a TLP retransmit, the DSACK for the +TLP retransmit could erroneously be implicitly be interpreted as +corresponding to the normal fast recovery or RTO recovery retransmit +that plugged a real hole, thus resulting in an improper undo. + +For example, consider the following buggy scenario where there is a +real packet loss but the congestion control response is improperly +undone because of this bug: + ++ send packets P1, P2, P3, P4 ++ P1 is really lost ++ send TLP retransmit of P4 ++ receive SACK for original P2, P3, P4 ++ enter fast recovery, fast-retransmit P1, increment undo_retrans to 1 ++ receive DSACK for TLP P4, decrement undo_retrans to 0, undo (bug!) ++ receive cumulative ACK for P1-P4 (fast retransmit plugged real hole) + +The fix: when we initialize undo machinery in tcp_init_undo(), if +there is a TLP retransmit in flight, then increment tp->undo_retrans +so that we make sure that we receive a DSACK corresponding to the TLP +retransmit, as well as DSACKs for all later normal retransmits, before +triggering a loss recovery undo. Note that we also have to move the +line that clears tp->tlp_high_seq for RTO recovery, so that upon RTO +we remember the tp->tlp_high_seq value until tcp_init_undo() and clear +it only afterward. + +Also note that the bug dates back to the original 2013 TLP +implementation, commit 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)"). + +However, this patch will only compile and work correctly with kernels +that have tp->tlp_retrans, which was added only in v5.8 in 2020 in +commit 76be93fc0702 ("tcp: allow at most one TLP probe per flight"). +So we associate this fix with that later commit. + +Fixes: 76be93fc0702 ("tcp: allow at most one TLP probe per flight") +Signed-off-by: Neal Cardwell +Reviewed-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Kevin Yang +Link: https://patch.msgid.link/20240703171246.1739561-1-ncardwell.sw@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 11 ++++++++++- + net/ipv4/tcp_timer.c | 2 -- + 2 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 57907fe94b238..cf6221e9fda50 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1956,8 +1956,16 @@ void tcp_clear_retrans(struct tcp_sock *tp) + static inline void tcp_init_undo(struct tcp_sock *tp) + { + tp->undo_marker = tp->snd_una; ++ + /* Retransmission still in flight may cause DSACKs later. */ +- tp->undo_retrans = tp->retrans_out ? : -1; ++ /* First, account for regular retransmits in flight: */ ++ tp->undo_retrans = tp->retrans_out; ++ /* Next, account for TLP retransmits in flight: */ ++ if (tp->tlp_high_seq && tp->tlp_retrans) ++ tp->undo_retrans++; ++ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ ++ if (!tp->undo_retrans) ++ tp->undo_retrans = -1; + } + + static bool tcp_is_rack(const struct sock *sk) +@@ -2036,6 +2044,7 @@ void tcp_enter_loss(struct sock *sk) + + tcp_set_ca_state(sk, TCP_CA_Loss); + tp->high_seq = tp->snd_nxt; ++ tp->tlp_high_seq = 0; + tcp_ecn_queue_cwr(tp); + + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous +diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c +index 551c4a78f68d4..e20fced3c9cf6 100644 +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -467,8 +467,6 @@ void tcp_retransmit_timer(struct sock *sk) + if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk))) + return; + +- tp->tlp_high_seq = 0; +- + if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && + !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { + /* Receiver dastardly shrinks window. Our retransmits +-- +2.43.0 + diff --git a/queue-5.4/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch b/queue-5.4/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch new file mode 100644 index 00000000000..f015ac06816 --- /dev/null +++ b/queue-5.4/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch @@ -0,0 +1,123 @@ +From 953be57e15b0bab3d1b81628a23330ae36844c2d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Jul 2024 12:13:56 -0700 +Subject: udp: Set SOCK_RCU_FREE earlier in udp_lib_get_port(). + +From: Kuniyuki Iwashima + +[ Upstream commit 5c0b485a8c6116516f33925b9ce5b6104a6eadfd ] + +syzkaller triggered the warning [0] in udp_v4_early_demux(). + +In udp_v[46]_early_demux() and sk_lookup(), we do not touch the refcount +of the looked-up sk and use sock_pfree() as skb->destructor, so we check +SOCK_RCU_FREE to ensure that the sk is safe to access during the RCU grace +period. + +Currently, SOCK_RCU_FREE is flagged for a bound socket after being put +into the hash table. Moreover, the SOCK_RCU_FREE check is done too early +in udp_v[46]_early_demux() and sk_lookup(), so there could be a small race +window: + + CPU1 CPU2 + ---- ---- + udp_v4_early_demux() udp_lib_get_port() + | |- hlist_add_head_rcu() + |- sk = __udp4_lib_demux_lookup() | + |- DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk)); + `- sock_set_flag(sk, SOCK_RCU_FREE) + +We had the same bug in TCP and fixed it in commit 871019b22d1b ("net: +set SOCK_RCU_FREE before inserting socket into hashtable"). + +Let's apply the same fix for UDP. + +[0]: +WARNING: CPU: 0 PID: 11198 at net/ipv4/udp.c:2599 udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599 +Modules linked in: +CPU: 0 PID: 11198 Comm: syz-executor.1 Not tainted 6.9.0-g93bda33046e7 #13 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +RIP: 0010:udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599 +Code: c5 7a 15 fe bb 01 00 00 00 44 89 e9 31 ff d3 e3 81 e3 bf ef ff ff 89 de e8 2c 74 15 fe 85 db 0f 85 02 06 00 00 e8 9f 7a 15 fe <0f> 0b e8 98 7a 15 fe 49 8d 7e 60 e8 4f 39 2f fe 49 c7 46 60 20 52 +RSP: 0018:ffffc9000ce3fa58 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8318c92c +RDX: ffff888036ccde00 RSI: ffffffff8318c2f1 RDI: 0000000000000001 +RBP: ffff88805a2dd6e0 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0001ffffffffffff R12: ffff88805a2dd680 +R13: 0000000000000007 R14: ffff88800923f900 R15: ffff88805456004e +FS: 00007fc449127640(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fc449126e38 CR3: 000000003de4b002 CR4: 0000000000770ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 +PKRU: 55555554 +Call Trace: + + ip_rcv_finish_core.constprop.0+0xbdd/0xd20 net/ipv4/ip_input.c:349 + ip_rcv_finish+0xda/0x150 net/ipv4/ip_input.c:447 + NF_HOOK include/linux/netfilter.h:314 [inline] + NF_HOOK include/linux/netfilter.h:308 [inline] + ip_rcv+0x16c/0x180 net/ipv4/ip_input.c:569 + __netif_receive_skb_one_core+0xb3/0xe0 net/core/dev.c:5624 + __netif_receive_skb+0x21/0xd0 net/core/dev.c:5738 + netif_receive_skb_internal net/core/dev.c:5824 [inline] + netif_receive_skb+0x271/0x300 net/core/dev.c:5884 + tun_rx_batched drivers/net/tun.c:1549 [inline] + tun_get_user+0x24db/0x2c50 drivers/net/tun.c:2002 + tun_chr_write_iter+0x107/0x1a0 drivers/net/tun.c:2048 + new_sync_write fs/read_write.c:497 [inline] + vfs_write+0x76f/0x8d0 fs/read_write.c:590 + ksys_write+0xbf/0x190 fs/read_write.c:643 + __do_sys_write fs/read_write.c:655 [inline] + __se_sys_write fs/read_write.c:652 [inline] + __x64_sys_write+0x41/0x50 fs/read_write.c:652 + x64_sys_call+0xe66/0x1990 arch/x86/include/generated/asm/syscalls_64.h:2 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 +RIP: 0033:0x7fc44a68bc1f +Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 e9 cf f5 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 3c d0 f5 ff 48 +RSP: 002b:00007fc449126c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 00000000004bc050 RCX: 00007fc44a68bc1f +RDX: 0000000000000032 RSI: 00000000200000c0 RDI: 00000000000000c8 +RBP: 00000000004bc050 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000032 R11: 0000000000000293 R12: 0000000000000000 +R13: 000000000000000b R14: 00007fc44a5ec530 R15: 0000000000000000 + + +Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") +Reported-by: syzkaller +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20240709191356.24010-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv4/udp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c +index b17b636548122..1ccdb6a9ab892 100644 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -313,6 +313,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, + goto fail_unlock; + } + ++ sock_set_flag(sk, SOCK_RCU_FREE); ++ + sk_add_node_rcu(sk, &hslot->head); + hslot->count++; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); +@@ -329,7 +331,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, + hslot2->count++; + spin_unlock(&hslot2->lock); + } +- sock_set_flag(sk, SOCK_RCU_FREE); ++ + error = 0; + fail_unlock: + spin_unlock_bh(&hslot->lock); +-- +2.43.0 + diff --git a/queue-5.4/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch b/queue-5.4/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch new file mode 100644 index 00000000000..b746bcbc476 --- /dev/null +++ b/queue-5.4/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch @@ -0,0 +1,88 @@ +From 057cfc7ff04bd0e4322352a624b398bf7e6bb83d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Jul 2024 08:13:01 -0400 +Subject: vfs: don't mod negative dentry count when on shrinker list + +From: Brian Foster + +[ Upstream commit aabfe57ebaa75841db47ea59091ec3c5a06d2f52 ] + +The nr_dentry_negative counter is intended to only account negative +dentries that are present on the superblock LRU. Therefore, the LRU +add, remove and isolate helpers modify the counter based on whether +the dentry is negative, but the shrinker list related helpers do not +modify the counter, and the paths that change a dentry between +positive and negative only do so if DCACHE_LRU_LIST is set. + +The problem with this is that a dentry on a shrinker list still has +DCACHE_LRU_LIST set to indicate ->d_lru is in use. The additional +DCACHE_SHRINK_LIST flag denotes whether the dentry is on LRU or a +shrink related list. Therefore if a relevant operation (i.e. unlink) +occurs while a dentry is present on a shrinker list, and the +associated codepath only checks for DCACHE_LRU_LIST, then it is +technically possible to modify the negative dentry count for a +dentry that is off the LRU. Since the shrinker list related helpers +do not modify the negative dentry count (because non-LRU dentries +should not be included in the count) when the dentry is ultimately +removed from the shrinker list, this can cause the negative dentry +count to become permanently inaccurate. + +This problem can be reproduced via a heavy file create/unlink vs. +drop_caches workload. On an 80xcpu system, I start 80 tasks each +running a 1k file create/delete loop, and one task spinning on +drop_caches. After 10 minutes or so of runtime, the idle/clean cache +negative dentry count increases from somewhere in the range of 5-10 +entries to several hundred (and increasingly grows beyond +nr_dentry_unused). + +Tweak the logic in the paths that turn a dentry negative or positive +to filter out the case where the dentry is present on a shrink +related list. This allows the above workload to maintain an accurate +negative dentry count. + +Fixes: af0c9af1b3f6 ("fs/dcache: Track & report number of negative dentries") +Signed-off-by: Brian Foster +Link: https://lore.kernel.org/r/20240703121301.247680-1-bfoster@redhat.com +Acked-by: Ian Kent +Reviewed-by: Josef Bacik +Reviewed-by: Waiman Long +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/dcache.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/fs/dcache.c b/fs/dcache.c +index 9505e5df30b74..c58b5e5cb045d 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -329,7 +329,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + WRITE_ONCE(dentry->d_flags, flags); + dentry->d_inode = NULL; +- if (flags & DCACHE_LRU_LIST) ++ /* ++ * The negative counter only tracks dentries on the LRU. Don't inc if ++ * d_lru is on another list. ++ */ ++ if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) + this_cpu_inc(nr_dentry_negative); + } + +@@ -1921,9 +1925,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) + + spin_lock(&dentry->d_lock); + /* +- * Decrement negative dentry count if it was in the LRU list. ++ * The negative counter only tracks dentries on the LRU. Don't dec if ++ * d_lru is on another list. + */ +- if (dentry->d_flags & DCACHE_LRU_LIST) ++ if ((dentry->d_flags & ++ (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) + this_cpu_dec(nr_dentry_negative); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); +-- +2.43.0 + -- 2.47.3