]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Sat, 13 Jul 2024 13:26:40 +0000 (09:26 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 13 Jul 2024 13:26:40 +0000 (09:26 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
13 files changed:
queue-5.10/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch [new file with mode: 0644]
queue-5.10/filelock-fix-potential-use-after-free-in-posix_lock_.patch [new file with mode: 0644]
queue-5.10/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch [new file with mode: 0644]
queue-5.10/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch [new file with mode: 0644]
queue-5.10/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch [new file with mode: 0644]
queue-5.10/net-lantiq_etop-add-blank-line-after-declaration.patch [new file with mode: 0644]
queue-5.10/net-sched-fix-uaf-when-resolving-a-clash.patch [new file with mode: 0644]
queue-5.10/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch [new file with mode: 0644]
queue-5.10/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch [new file with mode: 0644]
queue-5.10/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch [new file with mode: 0644]
queue-5.10/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch [new file with mode: 0644]

diff --git a/queue-5.10/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch b/queue-5.10/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch
new file mode 100644 (file)
index 0000000..6ef47ad
--- /dev/null
@@ -0,0 +1,122 @@
+From bb56e8e1c85b9d9dd9b4e2e1a39bc0478d0f823e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Jul 2024 08:19:43 +0200
+Subject: ethtool: netlink: do not return SQI value if link is down
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit c184cf94e73b04ff7048d045f5413899bc664788 ]
+
+Do not attach SQI value if link is down. "SQI values are only valid if
+link-up condition is present" per OpenAlliance specification of
+100Base-T1 Interoperability Test suite [1]. The same rule would apply
+for other link types.
+
+[1] https://opensig.org/automotive-ethernet-specifications/#
+
+Fixes: 806602191592 ("ethtool: provide UAPI for PHY Signal Quality Index (SQI)")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Woojung Huh <woojung.huh@microchip.com>
+Link: https://patch.msgid.link/20240709061943.729381-1-o.rempel@pengutronix.de
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/linkstate.c | 41 ++++++++++++++++++++++++++++-------------
+ 1 file changed, 28 insertions(+), 13 deletions(-)
+
+diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
+index fb676f349455a..470582a70ccbe 100644
+--- a/net/ethtool/linkstate.c
++++ b/net/ethtool/linkstate.c
+@@ -36,6 +36,8 @@ static int linkstate_get_sqi(struct net_device *dev)
+       mutex_lock(&phydev->lock);
+       if (!phydev->drv || !phydev->drv->get_sqi)
+               ret = -EOPNOTSUPP;
++      else if (!phydev->link)
++              ret = -ENETDOWN;
+       else
+               ret = phydev->drv->get_sqi(phydev);
+       mutex_unlock(&phydev->lock);
+@@ -54,6 +56,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
+       mutex_lock(&phydev->lock);
+       if (!phydev->drv || !phydev->drv->get_sqi_max)
+               ret = -EOPNOTSUPP;
++      else if (!phydev->link)
++              ret = -ENETDOWN;
+       else
+               ret = phydev->drv->get_sqi_max(phydev);
+       mutex_unlock(&phydev->lock);
+@@ -61,6 +65,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
+       return ret;
+ };
++static bool linkstate_sqi_critical_error(int sqi)
++{
++      return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
++}
++
++static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
++{
++      return data->sqi >= 0 && data->sqi_max >= 0 &&
++             data->sqi <= data->sqi_max;
++}
++
+ static int linkstate_get_link_ext_state(struct net_device *dev,
+                                       struct linkstate_reply_data *data)
+ {
+@@ -92,12 +107,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
+       data->link = __ethtool_get_link(dev);
+       ret = linkstate_get_sqi(dev);
+-      if (ret < 0 && ret != -EOPNOTSUPP)
++      if (linkstate_sqi_critical_error(ret))
+               goto out;
+       data->sqi = ret;
+       ret = linkstate_get_sqi_max(dev);
+-      if (ret < 0 && ret != -EOPNOTSUPP)
++      if (linkstate_sqi_critical_error(ret))
+               goto out;
+       data->sqi_max = ret;
+@@ -122,11 +137,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
+       len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+               + 0;
+-      if (data->sqi != -EOPNOTSUPP)
+-              len += nla_total_size(sizeof(u32));
+-
+-      if (data->sqi_max != -EOPNOTSUPP)
+-              len += nla_total_size(sizeof(u32));
++      if (linkstate_sqi_valid(data)) {
++              len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
++              len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
++      }
+       if (data->link_ext_state_provided)
+               len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
+@@ -147,13 +161,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
+           nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
+               return -EMSGSIZE;
+-      if (data->sqi != -EOPNOTSUPP &&
+-          nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+-              return -EMSGSIZE;
++      if (linkstate_sqi_valid(data)) {
++              if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
++                      return -EMSGSIZE;
+-      if (data->sqi_max != -EOPNOTSUPP &&
+-          nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
+-              return -EMSGSIZE;
++              if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
++                              data->sqi_max))
++                      return -EMSGSIZE;
++      }
+       if (data->link_ext_state_provided) {
+               if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
+-- 
+2.43.0
+
diff --git a/queue-5.10/filelock-fix-potential-use-after-free-in-posix_lock_.patch b/queue-5.10/filelock-fix-potential-use-after-free-in-posix_lock_.patch
new file mode 100644 (file)
index 0000000..0640345
--- /dev/null
@@ -0,0 +1,50 @@
+From 03d22fb044eb38dfdc0d072aa1c260e6eaff3028 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 18:44:48 -0400
+Subject: filelock: fix potential use-after-free in posix_lock_inode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 1b3ec4f7c03d4b07bad70697d7e2f4088d2cfe92 ]
+
+Light Hsieh reported a KASAN UAF warning in trace_posix_lock_inode().
+The request pointer had been changed earlier to point to a lock entry
+that was added to the inode's list. However, before the tracepoint could
+fire, another task raced in and freed that lock.
+
+Fix this by moving the tracepoint inside the spinlock, which should
+ensure that this doesn't happen.
+
+Fixes: 74f6f5912693 ("locks: fix KASAN: use-after-free in trace_event_raw_event_filelock_lock")
+Link: https://lore.kernel.org/linux-fsdevel/724ffb0a2962e912ea62bb0515deadf39c325112.camel@kernel.org/
+Reported-by: Light Hsieh (謝明燈) <Light.Hsieh@mediatek.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Link: https://lore.kernel.org/r/20240702-filelock-6-10-v1-1-96e766aadc98@kernel.org
+Reviewed-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/locks.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index b0753c8871fb2..843fa3d3375d4 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1392,9 +1392,9 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+               locks_wake_up_blocks(left);
+       }
+  out:
++      trace_posix_lock_inode(inode, request, error);
+       spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
+-      trace_posix_lock_inode(inode, request, error);
+       /*
+        * Free any unused locks.
+        */
+-- 
+2.43.0
+
diff --git a/queue-5.10/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch b/queue-5.10/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch
new file mode 100644 (file)
index 0000000..2ff05df
--- /dev/null
@@ -0,0 +1,44 @@
+From b1baff8ac8e73cc154c6feabbc7a8005f36c423c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Apr 2024 10:10:08 +0800
+Subject: fs/dcache: Re-use value stored to dentry->d_flags instead of
+ re-reading
+
+From: linke li <lilinke99@qq.com>
+
+[ Upstream commit 8bfb40be31ddea0cb4664b352e1797cfe6c91976 ]
+
+Currently, the __d_clear_type_and_inode() writes the value flags to
+dentry->d_flags, then immediately re-reads it in order to use it in a if
+statement. This re-read is useless because no other update to
+dentry->d_flags can occur at this point.
+
+This commit therefore re-use flags in the if statement instead of
+re-reading dentry->d_flags.
+
+Signed-off-by: linke li <lilinke99@qq.com>
+Link: https://lore.kernel.org/r/tencent_5E187BD0A61BA28605E85405F15228254D0A@qq.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: aabfe57ebaa7 ("vfs: don't mod negative dentry count when on shrinker list")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/dcache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 976c7474d62a9..d2ebee3a120c4 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -329,7 +329,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
+       flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+       WRITE_ONCE(dentry->d_flags, flags);
+       dentry->d_inode = NULL;
+-      if (dentry->d_flags & DCACHE_LRU_LIST)
++      if (flags & DCACHE_LRU_LIST)
+               this_cpu_inc(nr_dentry_negative);
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch b/queue-5.10/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch
new file mode 100644 (file)
index 0000000..5832d51
--- /dev/null
@@ -0,0 +1,44 @@
+From 1f2806226608f0e74022db702b0ea6a53896b7d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 20:16:39 -0400
+Subject: mm: prevent derefencing NULL ptr in pfn_section_valid()
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 82f0b6f041fad768c28b4ad05a683065412c226e ]
+
+Commit 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing
+memory_section->usage") changed pfn_section_valid() to add a READ_ONCE()
+call around "ms->usage" to fix a race with section_deactivate() where
+ms->usage can be cleared.  The READ_ONCE() call, by itself, is not enough
+to prevent NULL pointer dereference.  We need to check its value before
+dereferencing it.
+
+Link: https://lkml.kernel.org/r/20240626001639.1350646-1-longman@redhat.com
+Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: Charan Teja Kalla <quic_charante@quicinc.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmzone.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index ffae2b3308180..71150fb1cb2ad 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -1353,8 +1353,9 @@ static inline int subsection_map_index(unsigned long pfn)
+ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+ {
+       int idx = subsection_map_index(pfn);
++      struct mem_section_usage *usage = READ_ONCE(ms->usage);
+-      return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
++      return usage ? test_bit(idx, usage->subsection_map) : 0;
+ }
+ #else
+ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+-- 
+2.43.0
+
diff --git a/queue-5.10/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch b/queue-5.10/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch
new file mode 100644 (file)
index 0000000..b89ca45
--- /dev/null
@@ -0,0 +1,43 @@
+From 4a356ae8eee577231494d97320f336fed42309fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 22:58:26 +0200
+Subject: net: ethernet: lantiq_etop: fix double free in detach
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit e1533b6319ab9c3a97dad314dd88b3783bc41b69 ]
+
+The number of the currently released descriptor is never incremented
+which results in the same skb being released multiple times.
+
+Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver")
+Reported-by: Joe Perches <joe@perches.com>
+Closes: https://lore.kernel.org/all/fc1bf93d92bb5b2f99c6c62745507cc22f3a7b2d.camel@perches.com/
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20240708205826.5176-1-olek2@wp.pl
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_etop.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
+index 62300d46d9186..1d7c0b872c594 100644
+--- a/drivers/net/ethernet/lantiq_etop.c
++++ b/drivers/net/ethernet/lantiq_etop.c
+@@ -213,9 +213,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
+       if (ch->dma.irq)
+               free_irq(ch->dma.irq, priv);
+       if (IS_RX(ch->idx)) {
+-              int desc;
++              struct ltq_dma_channel *dma = &ch->dma;
+-              for (desc = 0; desc < LTQ_DESC_NUM; desc++)
++              for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++)
+                       dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/net-lantiq_etop-add-blank-line-after-declaration.patch b/queue-5.10/net-lantiq_etop-add-blank-line-after-declaration.patch
new file mode 100644 (file)
index 0000000..2a92529
--- /dev/null
@@ -0,0 +1,40 @@
+From 35013da28f5da0839f4915e0ff0bbdab37194512 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 23:00:31 +0100
+Subject: net: lantiq_etop: add blank line after declaration
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit 4c46625bb586a741b8d0e6bdbddbcb2549fa1d36 ]
+
+This patch adds a missing line after the declaration and
+fixes the checkpatch warning:
+
+WARNING: Missing a blank line after declarations
++              int desc;
++              for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Link: https://lore.kernel.org/r/20211228220031.71576-1-olek2@wp.pl
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: e1533b6319ab ("net: ethernet: lantiq_etop: fix double free in detach")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_etop.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
+index 5ea626b1e5783..62300d46d9186 100644
+--- a/drivers/net/ethernet/lantiq_etop.c
++++ b/drivers/net/ethernet/lantiq_etop.c
+@@ -214,6 +214,7 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
+               free_irq(ch->dma.irq, priv);
+       if (IS_RX(ch->idx)) {
+               int desc;
++
+               for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+                       dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+       }
+-- 
+2.43.0
+
diff --git a/queue-5.10/net-sched-fix-uaf-when-resolving-a-clash.patch b/queue-5.10/net-sched-fix-uaf-when-resolving-a-clash.patch
new file mode 100644 (file)
index 0000000..d0493cf
--- /dev/null
@@ -0,0 +1,131 @@
+From d074acdda55dec9fc37ae1f9fa7daa5058efaca5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 13:37:47 +0800
+Subject: net/sched: Fix UAF when resolving a clash
+
+From: Chengen Du <chengen.du@canonical.com>
+
+[ Upstream commit 26488172b0292bed837b95a006a3f3431d1898c3 ]
+
+KASAN reports the following UAF:
+
+ BUG: KASAN: slab-use-after-free in tcf_ct_flow_table_process_conn+0x12b/0x380 [act_ct]
+ Read of size 1 at addr ffff888c07603600 by task handler130/6469
+
+ Call Trace:
+  <IRQ>
+  dump_stack_lvl+0x48/0x70
+  print_address_description.constprop.0+0x33/0x3d0
+  print_report+0xc0/0x2b0
+  kasan_report+0xd0/0x120
+  __asan_load1+0x6c/0x80
+  tcf_ct_flow_table_process_conn+0x12b/0x380 [act_ct]
+  tcf_ct_act+0x886/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+  __irq_exit_rcu+0x82/0xc0
+  irq_exit_rcu+0xe/0x20
+  common_interrupt+0xa1/0xb0
+  </IRQ>
+  <TASK>
+  asm_common_interrupt+0x27/0x40
+
+ Allocated by task 6469:
+  kasan_save_stack+0x38/0x70
+  kasan_set_track+0x25/0x40
+  kasan_save_alloc_info+0x1e/0x40
+  __kasan_krealloc+0x133/0x190
+  krealloc+0xaa/0x130
+  nf_ct_ext_add+0xed/0x230 [nf_conntrack]
+  tcf_ct_act+0x1095/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+
+ Freed by task 6469:
+  kasan_save_stack+0x38/0x70
+  kasan_set_track+0x25/0x40
+  kasan_save_free_info+0x2b/0x60
+  ____kasan_slab_free+0x180/0x1f0
+  __kasan_slab_free+0x12/0x30
+  slab_free_freelist_hook+0xd2/0x1a0
+  __kmem_cache_free+0x1a2/0x2f0
+  kfree+0x78/0x120
+  nf_conntrack_free+0x74/0x130 [nf_conntrack]
+  nf_ct_destroy+0xb2/0x140 [nf_conntrack]
+  __nf_ct_resolve_clash+0x529/0x5d0 [nf_conntrack]
+  nf_ct_resolve_clash+0xf6/0x490 [nf_conntrack]
+  __nf_conntrack_confirm+0x2c6/0x770 [nf_conntrack]
+  tcf_ct_act+0x12ad/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+
+The ct may be dropped if a clash has been resolved but is still passed to
+the tcf_ct_flow_table_process_conn function for further usage. This issue
+can be fixed by retrieving ct from skb again after confirming conntrack.
+
+Fixes: 0cc254e5aa37 ("net/sched: act_ct: Offload connections with commit action")
+Co-developed-by: Gerald Yang <gerald.yang@canonical.com>
+Signed-off-by: Gerald Yang <gerald.yang@canonical.com>
+Signed-off-by: Chengen Du <chengen.du@canonical.com>
+Link: https://patch.msgid.link/20240710053747.13223-1-chengen.du@canonical.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index c6d6a6fe9602b..a59a8ad387211 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -1038,6 +1038,14 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+                */
+               if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+                       goto drop;
++
++              /* The ct may be dropped if a clash has been resolved,
++               * so it's necessary to retrieve it from skb again to
++               * prevent UAF.
++               */
++              ct = nf_ct_get(skb, &ctinfo);
++              if (!ct)
++                      skip_add = true;
+       }
+       if (!skip_add)
+-- 
+2.43.0
+
diff --git a/queue-5.10/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch b/queue-5.10/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch
new file mode 100644 (file)
index 0000000..837cc9a
--- /dev/null
@@ -0,0 +1,44 @@
+From 6c623c457f249287da3d6fea7dc8994c5759b33d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Jul 2024 12:53:17 +0300
+Subject: octeontx2-af: Fix incorrect value output on error path in
+ rvu_check_rsrc_availability()
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit 442e26af9aa8115c96541026cbfeaaa76c85d178 ]
+
+In rvu_check_rsrc_availability() in case of invalid SSOW req, an incorrect
+data is printed to error log. 'req->sso' value is printed instead of
+'req->ssow'. Looks like "copy-paste" mistake.
+
+Fix this mistake by replacing 'req->sso' with 'req->ssow'.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 746ea74241fa ("octeontx2-af: Add RVU block LF provisioning support")
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20240705095317.12640-1-amishin@t-argos.ru
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index 23b829f974de1..e8a2552fb690a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -1357,7 +1357,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu,
+               if (req->ssow > block->lf.max) {
+                       dev_err(&rvu->pdev->dev,
+                               "Func 0x%x: Invalid SSOW req, %d > max %d\n",
+-                               pcifunc, req->sso, block->lf.max);
++                               pcifunc, req->ssow, block->lf.max);
+                       return -EINVAL;
+               }
+               mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);
+-- 
+2.43.0
+
diff --git a/queue-5.10/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch b/queue-5.10/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch
new file mode 100644 (file)
index 0000000..1642397
--- /dev/null
@@ -0,0 +1,67 @@
+From 1ee6b84b3c3082db411d27bc26417b20b98a6752 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 14:56:15 +0300
+Subject: ppp: reject claimed-as-LCP but actually malformed packets
+
+From: Dmitry Antipov <dmantipov@yandex.ru>
+
+[ Upstream commit f2aeb7306a898e1cbd03963d376f4b6656ca2b55 ]
+
+Since 'ppp_async_encode()' assumes valid LCP packets (with code
+from 1 to 7 inclusive), add 'ppp_check_packet()' to ensure that
+LCP packet has an actual body beyond PPP_LCP header bytes, and
+reject claimed-as-LCP but actually malformed data otherwise.
+
+Reported-by: syzbot+ec0723ba9605678b14bf@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=ec0723ba9605678b14bf
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/ppp_generic.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index b825c6a9b6dde..e2bca6fa08220 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -70,6 +70,7 @@
+ #define MPHDRLEN_SSN  4       /* ditto with short sequence numbers */
+ #define PPP_PROTO_LEN 2
++#define PPP_LCP_HDRLEN        4
+ /*
+  * An instance of /dev/ppp can be associated with either a ppp
+@@ -489,6 +490,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
+       return ret;
+ }
++static bool ppp_check_packet(struct sk_buff *skb, size_t count)
++{
++      /* LCP packets must include LCP header which 4 bytes long:
++       * 1-byte code, 1-byte identifier, and 2-byte length.
++       */
++      return get_unaligned_be16(skb->data) != PPP_LCP ||
++              count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN;
++}
++
+ static ssize_t ppp_write(struct file *file, const char __user *buf,
+                        size_t count, loff_t *ppos)
+ {
+@@ -511,6 +521,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
+               kfree_skb(skb);
+               goto out;
+       }
++      ret = -EINVAL;
++      if (unlikely(!ppp_check_packet(skb, count))) {
++              kfree_skb(skb);
++              goto out;
++      }
+       switch (pf->kind) {
+       case INTERFACE:
+-- 
+2.43.0
+
index 84cd03d4ba36a8e246398c31bd50038766e85c59..6ff8b4a8ffc24e15585421b42c522174f3129205 100644 (file)
@@ -56,3 +56,15 @@ platform-x86-touchscreen_dmi-add-info-for-the-ezpad-.patch
 nvmet-fix-a-possible-leak-when-destroy-a-ctrl-during.patch
 kbuild-fix-short-log-for-as-in-link-vmlinux.sh.patch
 nilfs2-fix-incorrect-inode-allocation-from-reserved-inodes.patch
+mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch
+filelock-fix-potential-use-after-free-in-posix_lock_.patch
+fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch
+vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch
+tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch
+octeontx2-af-fix-incorrect-value-output-on-error-pat.patch
+net-lantiq_etop-add-blank-line-after-declaration.patch
+net-ethernet-lantiq_etop-fix-double-free-in-detach.patch
+ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch
+ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch
+udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch
+net-sched-fix-uaf-when-resolving-a-clash.patch
diff --git a/queue-5.10/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch b/queue-5.10/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch
new file mode 100644 (file)
index 0000000..08174a3
--- /dev/null
@@ -0,0 +1,107 @@
+From fad662eaad0bba7222c08cfb2dbcdc6567a42eda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 13:12:46 -0400
+Subject: tcp: fix incorrect undo caused by DSACK of TLP retransmit
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 0ec986ed7bab6801faed1440e8839dcc710331ff ]
+
+Loss recovery undo_retrans bookkeeping had a long-standing bug where a
+DSACK from a spurious TLP retransmit packet could cause an erroneous
+undo of a fast recovery or RTO recovery that repaired a single
+really-lost packet (in a sequence range outside that of the TLP
+retransmit). Basically, because the loss recovery state machine didn't
+account for the fact that it sent a TLP retransmit, the DSACK for the
+TLP retransmit could erroneously be implicitly be interpreted as
+corresponding to the normal fast recovery or RTO recovery retransmit
+that plugged a real hole, thus resulting in an improper undo.
+
+For example, consider the following buggy scenario where there is a
+real packet loss but the congestion control response is improperly
+undone because of this bug:
+
++ send packets P1, P2, P3, P4
++ P1 is really lost
++ send TLP retransmit of P4
++ receive SACK for original P2, P3, P4
++ enter fast recovery, fast-retransmit P1, increment undo_retrans to 1
++ receive DSACK for TLP P4, decrement undo_retrans to 0, undo (bug!)
++ receive cumulative ACK for P1-P4 (fast retransmit plugged real hole)
+
+The fix: when we initialize undo machinery in tcp_init_undo(), if
+there is a TLP retransmit in flight, then increment tp->undo_retrans
+so that we make sure that we receive a DSACK corresponding to the TLP
+retransmit, as well as DSACKs for all later normal retransmits, before
+triggering a loss recovery undo. Note that we also have to move the
+line that clears tp->tlp_high_seq for RTO recovery, so that upon RTO
+we remember the tp->tlp_high_seq value until tcp_init_undo() and clear
+it only afterward.
+
+Also note that the bug dates back to the original 2013 TLP
+implementation, commit 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)").
+
+However, this patch will only compile and work correctly with kernels
+that have tp->tlp_retrans, which was added only in v5.8 in 2020 in
+commit 76be93fc0702 ("tcp: allow at most one TLP probe per flight").
+So we associate this fix with that later commit.
+
+Fixes: 76be93fc0702 ("tcp: allow at most one TLP probe per flight")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Kevin Yang <yyd@google.com>
+Link: https://patch.msgid.link/20240703171246.1739561-1-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 11 ++++++++++-
+ net/ipv4/tcp_timer.c |  2 --
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 604ff1b04c3ef..06c03b21500fb 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2084,8 +2084,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
+ static inline void tcp_init_undo(struct tcp_sock *tp)
+ {
+       tp->undo_marker = tp->snd_una;
++
+       /* Retransmission still in flight may cause DSACKs later. */
+-      tp->undo_retrans = tp->retrans_out ? : -1;
++      /* First, account for regular retransmits in flight: */
++      tp->undo_retrans = tp->retrans_out;
++      /* Next, account for TLP retransmits in flight: */
++      if (tp->tlp_high_seq && tp->tlp_retrans)
++              tp->undo_retrans++;
++      /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
++      if (!tp->undo_retrans)
++              tp->undo_retrans = -1;
+ }
+ static bool tcp_is_rack(const struct sock *sk)
+@@ -2164,6 +2172,7 @@ void tcp_enter_loss(struct sock *sk)
+       tcp_set_ca_state(sk, TCP_CA_Loss);
+       tp->high_seq = tp->snd_nxt;
++      tp->tlp_high_seq = 0;
+       tcp_ecn_queue_cwr(tp);
+       /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 5c7e10939dd90..2925b985e11cb 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -492,8 +492,6 @@ void tcp_retransmit_timer(struct sock *sk)
+       if (WARN_ON_ONCE(!skb))
+               return;
+-      tp->tlp_high_seq = 0;
+-
+       if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
+           !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
+               /* Receiver dastardly shrinks window. Our retransmits
+-- 
+2.43.0
+
diff --git a/queue-5.10/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch b/queue-5.10/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch
new file mode 100644 (file)
index 0000000..e5c7e49
--- /dev/null
@@ -0,0 +1,123 @@
+From 5009199d5cfbefc0b3b837cc9906ed54b1800974 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Jul 2024 12:13:56 -0700
+Subject: udp: Set SOCK_RCU_FREE earlier in udp_lib_get_port().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 5c0b485a8c6116516f33925b9ce5b6104a6eadfd ]
+
+syzkaller triggered the warning [0] in udp_v4_early_demux().
+
+In udp_v[46]_early_demux() and sk_lookup(), we do not touch the refcount
+of the looked-up sk and use sock_pfree() as skb->destructor, so we check
+SOCK_RCU_FREE to ensure that the sk is safe to access during the RCU grace
+period.
+
+Currently, SOCK_RCU_FREE is flagged for a bound socket after being put
+into the hash table.  Moreover, the SOCK_RCU_FREE check is done too early
+in udp_v[46]_early_demux() and sk_lookup(), so there could be a small race
+window:
+
+  CPU1                                 CPU2
+  ----                                 ----
+  udp_v4_early_demux()                 udp_lib_get_port()
+  |                                    |- hlist_add_head_rcu()
+  |- sk = __udp4_lib_demux_lookup()    |
+  |- DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+                                       `- sock_set_flag(sk, SOCK_RCU_FREE)
+
+We had the same bug in TCP and fixed it in commit 871019b22d1b ("net:
+set SOCK_RCU_FREE before inserting socket into hashtable").
+
+Let's apply the same fix for UDP.
+
+[0]:
+WARNING: CPU: 0 PID: 11198 at net/ipv4/udp.c:2599 udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599
+Modules linked in:
+CPU: 0 PID: 11198 Comm: syz-executor.1 Not tainted 6.9.0-g93bda33046e7 #13
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+RIP: 0010:udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599
+Code: c5 7a 15 fe bb 01 00 00 00 44 89 e9 31 ff d3 e3 81 e3 bf ef ff ff 89 de e8 2c 74 15 fe 85 db 0f 85 02 06 00 00 e8 9f 7a 15 fe <0f> 0b e8 98 7a 15 fe 49 8d 7e 60 e8 4f 39 2f fe 49 c7 46 60 20 52
+RSP: 0018:ffffc9000ce3fa58 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8318c92c
+RDX: ffff888036ccde00 RSI: ffffffff8318c2f1 RDI: 0000000000000001
+RBP: ffff88805a2dd6e0 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0001ffffffffffff R12: ffff88805a2dd680
+R13: 0000000000000007 R14: ffff88800923f900 R15: ffff88805456004e
+FS:  00007fc449127640(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fc449126e38 CR3: 000000003de4b002 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ip_rcv_finish_core.constprop.0+0xbdd/0xd20 net/ipv4/ip_input.c:349
+ ip_rcv_finish+0xda/0x150 net/ipv4/ip_input.c:447
+ NF_HOOK include/linux/netfilter.h:314 [inline]
+ NF_HOOK include/linux/netfilter.h:308 [inline]
+ ip_rcv+0x16c/0x180 net/ipv4/ip_input.c:569
+ __netif_receive_skb_one_core+0xb3/0xe0 net/core/dev.c:5624
+ __netif_receive_skb+0x21/0xd0 net/core/dev.c:5738
+ netif_receive_skb_internal net/core/dev.c:5824 [inline]
+ netif_receive_skb+0x271/0x300 net/core/dev.c:5884
+ tun_rx_batched drivers/net/tun.c:1549 [inline]
+ tun_get_user+0x24db/0x2c50 drivers/net/tun.c:2002
+ tun_chr_write_iter+0x107/0x1a0 drivers/net/tun.c:2048
+ new_sync_write fs/read_write.c:497 [inline]
+ vfs_write+0x76f/0x8d0 fs/read_write.c:590
+ ksys_write+0xbf/0x190 fs/read_write.c:643
+ __do_sys_write fs/read_write.c:655 [inline]
+ __se_sys_write fs/read_write.c:652 [inline]
+ __x64_sys_write+0x41/0x50 fs/read_write.c:652
+ x64_sys_call+0xe66/0x1990 arch/x86/include/generated/asm/syscalls_64.h:2
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+RIP: 0033:0x7fc44a68bc1f
+Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 e9 cf f5 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 3c d0 f5 ff 48
+RSP: 002b:00007fc449126c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 00000000004bc050 RCX: 00007fc44a68bc1f
+RDX: 0000000000000032 RSI: 00000000200000c0 RDI: 00000000000000c8
+RBP: 00000000004bc050 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000032 R11: 0000000000000293 R12: 0000000000000000
+R13: 000000000000000b R14: 00007fc44a5ec530 R15: 0000000000000000
+ </TASK>
+
+Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240709191356.24010-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index da9015efb45e4..6ad25dc9710c1 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -317,6 +317,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
+                       goto fail_unlock;
+               }
++              sock_set_flag(sk, SOCK_RCU_FREE);
++
+               sk_add_node_rcu(sk, &hslot->head);
+               hslot->count++;
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+@@ -333,7 +335,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
+               hslot2->count++;
+               spin_unlock(&hslot2->lock);
+       }
+-      sock_set_flag(sk, SOCK_RCU_FREE);
++
+       error = 0;
+ fail_unlock:
+       spin_unlock_bh(&hslot->lock);
+-- 
+2.43.0
+
diff --git a/queue-5.10/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch b/queue-5.10/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch
new file mode 100644 (file)
index 0000000..fd37d69
--- /dev/null
@@ -0,0 +1,88 @@
+From a9dff7ab49db06e162c0c31e342906457070d417 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 08:13:01 -0400
+Subject: vfs: don't mod negative dentry count when on shrinker list
+
+From: Brian Foster <bfoster@redhat.com>
+
+[ Upstream commit aabfe57ebaa75841db47ea59091ec3c5a06d2f52 ]
+
+The nr_dentry_negative counter is intended to only account negative
+dentries that are present on the superblock LRU. Therefore, the LRU
+add, remove and isolate helpers modify the counter based on whether
+the dentry is negative, but the shrinker list related helpers do not
+modify the counter, and the paths that change a dentry between
+positive and negative only do so if DCACHE_LRU_LIST is set.
+
+The problem with this is that a dentry on a shrinker list still has
+DCACHE_LRU_LIST set to indicate ->d_lru is in use. The additional
+DCACHE_SHRINK_LIST flag denotes whether the dentry is on LRU or a
+shrink related list. Therefore if a relevant operation (i.e. unlink)
+occurs while a dentry is present on a shrinker list, and the
+associated codepath only checks for DCACHE_LRU_LIST, then it is
+technically possible to modify the negative dentry count for a
+dentry that is off the LRU. Since the shrinker list related helpers
+do not modify the negative dentry count (because non-LRU dentries
+should not be included in the count) when the dentry is ultimately
+removed from the shrinker list, this can cause the negative dentry
+count to become permanently inaccurate.
+
+This problem can be reproduced via a heavy file create/unlink vs.
+drop_caches workload. On an 80xcpu system, I start 80 tasks each
+running a 1k file create/delete loop, and one task spinning on
+drop_caches. After 10 minutes or so of runtime, the idle/clean cache
+negative dentry count increases from somewhere in the range of 5-10
+entries to several hundred (and increasingly grows beyond
+nr_dentry_unused).
+
+Tweak the logic in the paths that turn a dentry negative or positive
+to filter out the case where the dentry is present on a shrink
+related list. This allows the above workload to maintain an accurate
+negative dentry count.
+
+Fixes: af0c9af1b3f6 ("fs/dcache: Track & report number of negative dentries")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Link: https://lore.kernel.org/r/20240703121301.247680-1-bfoster@redhat.com
+Acked-by: Ian Kent <ikent@redhat.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/dcache.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index d2ebee3a120c4..406a71abb1b59 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -329,7 +329,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
+       flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+       WRITE_ONCE(dentry->d_flags, flags);
+       dentry->d_inode = NULL;
+-      if (flags & DCACHE_LRU_LIST)
++      /*
++       * The negative counter only tracks dentries on the LRU. Don't inc if
++       * d_lru is on another list.
++       */
++      if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
+               this_cpu_inc(nr_dentry_negative);
+ }
+@@ -1940,9 +1944,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
+       spin_lock(&dentry->d_lock);
+       /*
+-       * Decrement negative dentry count if it was in the LRU list.
++       * The negative counter only tracks dentries on the LRU. Don't dec if
++       * d_lru is on another list.
+        */
+-      if (dentry->d_flags & DCACHE_LRU_LIST)
++      if ((dentry->d_flags &
++           (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
+               this_cpu_dec(nr_dentry_negative);
+       hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+       raw_write_seqcount_begin(&dentry->d_seq);
+-- 
+2.43.0
+