]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 13 Jul 2024 13:26:38 +0000 (09:26 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 13 Jul 2024 13:26:38 +0000 (09:26 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
30 files changed:
queue-6.1/bpf-fix-order-of-args-in-call-to-bpf_map_kvcalloc.patch [new file with mode: 0644]
queue-6.1/bpf-reduce-smap-elem_size.patch [new file with mode: 0644]
queue-6.1/bpf-refactor-some-inode-task-sk-storage-functions-fo.patch [new file with mode: 0644]
queue-6.1/bpf-remove-__bpf_local_storage_map_alloc.patch [new file with mode: 0644]
queue-6.1/bpf-use-bpf_map_kvcalloc-in-bpf_local_storage.patch [new file with mode: 0644]
queue-6.1/cachefiles-add-missing-lock-protection-when-polling.patch [new file with mode: 0644]
queue-6.1/cachefiles-cancel-all-requests-for-the-object-that-i.patch [new file with mode: 0644]
queue-6.1/cachefiles-cyclic-allocation-of-msg_id-to-avoid-reus.patch [new file with mode: 0644]
queue-6.1/cachefiles-narrow-the-scope-of-triggering-epollin-ev.patch [new file with mode: 0644]
queue-6.1/cachefiles-propagate-errors-from-vfs_getxattr-to-avo.patch [new file with mode: 0644]
queue-6.1/cachefiles-stop-sending-new-request-when-dropping-ob.patch [new file with mode: 0644]
queue-6.1/cachefiles-wait-for-ondemand_object_worker-to-finish.patch [new file with mode: 0644]
queue-6.1/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch [new file with mode: 0644]
queue-6.1/filelock-fix-potential-use-after-free-in-posix_lock_.patch [new file with mode: 0644]
queue-6.1/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch [new file with mode: 0644]
queue-6.1/i40e-fix-xdp-program-unloading-while-removing-the-dr.patch [new file with mode: 0644]
queue-6.1/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch [new file with mode: 0644]
queue-6.1/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch [new file with mode: 0644]
queue-6.1/net-ethernet-mtk-star-emac-set-mac_managed_pm-when-p.patch [new file with mode: 0644]
queue-6.1/net-fix-rc7-s-__skb_datagram_iter.patch [new file with mode: 0644]
queue-6.1/net-phy-microchip-lan87xx-reinit-phy-after-cable-tes.patch [new file with mode: 0644]
queue-6.1/net-sched-fix-uaf-when-resolving-a-clash.patch [new file with mode: 0644]
queue-6.1/net-sunrpc-remap-eperm-in-case-of-connection-failure.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch [new file with mode: 0644]
queue-6.1/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch [new file with mode: 0644]
queue-6.1/series [new file with mode: 0644]
queue-6.1/skmsg-skip-zero-length-skb-in-sk_msg_recvmsg.patch [new file with mode: 0644]
queue-6.1/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch [new file with mode: 0644]
queue-6.1/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch [new file with mode: 0644]
queue-6.1/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch [new file with mode: 0644]

diff --git a/queue-6.1/bpf-fix-order-of-args-in-call-to-bpf_map_kvcalloc.patch b/queue-6.1/bpf-fix-order-of-args-in-call-to-bpf_map_kvcalloc.patch
new file mode 100644 (file)
index 0000000..87ae30b
--- /dev/null
@@ -0,0 +1,61 @@
+From 690a7c085596a46117e8a65d8e038c4afb3f1112 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 12:05:22 +0200
+Subject: bpf: fix order of args in call to bpf_map_kvcalloc
+
+From: Mohammad Shehar Yaar Tausif <sheharyaar48@gmail.com>
+
+[ Upstream commit af253aef183a31ce62d2e39fc520b0ebfb562bb9 ]
+
+The original function call passed size of smap->bucket before the number of
+buckets which raises the error 'calloc-transposed-args' on compilation.
+
+Vlastimil Babka added:
+
+The order of parameters can be traced back all the way to 6ac99e8f23d4
+("bpf: Introduce bpf sk local storage") accross several refactorings,
+and that's why the commit is used as a Fixes: tag.
+
+In v6.10-rc1, a different commit 2c321f3f70bc ("mm: change inlined
+allocation helpers to account at the call site") however exposed the
+order of args in a way that gcc-14 has enough visibility to start
+warning about it, because (in !CONFIG_MEMCG case) bpf_map_kvcalloc is
+then a macro alias for kvcalloc instead of a static inline wrapper.
+
+To sum up the warning happens when the following conditions are all met:
+
+- gcc-14 is used (didn't see it with gcc-13)
+- commit 2c321f3f70bc is present
+- CONFIG_MEMCG is not enabled in .config
+- CONFIG_WERROR turns this from a compiler warning to error
+
+Fixes: 6ac99e8f23d4 ("bpf: Introduce bpf sk local storage")
+Reviewed-by: Andrii Nakryiko <andrii@kernel.org>
+Tested-by: Christian Kujau <lists@nerdbynature.de>
+Signed-off-by: Mohammad Shehar Yaar Tausif <sheharyaar48@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Link: https://lore.kernel.org/r/20240710100521.15061-2-vbabka@suse.cz
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_local_storage.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index 888b8e481083f..51a9f024c1829 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -620,8 +620,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
+       nbuckets = max_t(u32, 2, nbuckets);
+       smap->bucket_log = ilog2(nbuckets);
+-      smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
+-                                       nbuckets, GFP_USER | __GFP_NOWARN);
++      smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
++                                       sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
+       if (!smap->buckets) {
+               bpf_map_area_free(smap);
+               return ERR_PTR(-ENOMEM);
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-reduce-smap-elem_size.patch b/queue-6.1/bpf-reduce-smap-elem_size.patch
new file mode 100644 (file)
index 0000000..139fe7d
--- /dev/null
@@ -0,0 +1,44 @@
+From dd95c48b0dd43cc09f096180ebd06fb702320d0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 17:30:36 -0800
+Subject: bpf: Reduce smap->elem_size
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit 552d42a356ebf78df9d2f4b73e077d2459966fac ]
+
+'struct bpf_local_storage_elem' has an unused 56 byte padding at the
+end due to struct's cache-line alignment requirement. This padding
+space is overlapped by storage value contents, so if we use sizeof()
+to calculate the total size, we overinflate it by 56 bytes. Use
+offsetof() instead to calculate more exact memory use.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Yonghong Song <yhs@fb.com>
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20221221013036.3427431-1-martin.lau@linux.dev
+Stable-dep-of: af253aef183a ("bpf: fix order of args in call to bpf_map_kvcalloc")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_local_storage.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index b1090a2b02b34..f8dd7c516e320 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -580,8 +580,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
+               raw_spin_lock_init(&smap->buckets[i].lock);
+       }
+-      smap->elem_size =
+-              sizeof(struct bpf_local_storage_elem) + attr->value_size;
++      smap->elem_size = offsetof(struct bpf_local_storage_elem,
++                                 sdata.data[attr->value_size]);
+       return smap;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-refactor-some-inode-task-sk-storage-functions-fo.patch b/queue-6.1/bpf-refactor-some-inode-task-sk-storage-functions-fo.patch
new file mode 100644 (file)
index 0000000..9896a58
--- /dev/null
@@ -0,0 +1,523 @@
+From b243ffd1225efb6fbb28431fe18afb6657e01725 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Oct 2022 21:28:45 -0700
+Subject: bpf: Refactor some inode/task/sk storage functions for reuse
+
+From: Yonghong Song <yhs@fb.com>
+
+[ Upstream commit c83597fa5dc6b322e9bdf929e5f4136a3f4aa4db ]
+
+Refactor codes so that inode/task/sk storage implementation
+can maximally share the same code. I also added some comments
+in new function bpf_local_storage_unlink_nolock() to make
+codes easy to understand. There is no functionality change.
+
+Acked-by: David Vernet <void@manifault.com>
+Signed-off-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20221026042845.672944-1-yhs@fb.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: af253aef183a ("bpf: fix order of args in call to bpf_map_kvcalloc")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_local_storage.h |  17 ++-
+ kernel/bpf/bpf_inode_storage.c    |  38 +-----
+ kernel/bpf/bpf_local_storage.c    | 190 +++++++++++++++++++-----------
+ kernel/bpf/bpf_task_storage.c     |  38 +-----
+ net/core/bpf_sk_storage.c         |  35 +-----
+ 5 files changed, 137 insertions(+), 181 deletions(-)
+
+diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
+index 7ea18d4da84b8..6d37a40cd90e8 100644
+--- a/include/linux/bpf_local_storage.h
++++ b/include/linux/bpf_local_storage.h
+@@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = {                   \
+       .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),        \
+ }
+-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+-void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+-                                    u16 idx);
+-
+ /* Helper functions for bpf_local_storage */
+ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+-struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
++struct bpf_map *
++bpf_local_storage_map_alloc(union bpf_attr *attr,
++                          struct bpf_local_storage_cache *cache);
+ struct bpf_local_storage_data *
+ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+                        struct bpf_local_storage_map *smap,
+                        bool cacheit_lockit);
+-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
++bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage);
++
++void bpf_local_storage_map_free(struct bpf_map *map,
++                              struct bpf_local_storage_cache *cache,
+                               int __percpu *busy_counter);
+ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+@@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+                                  struct bpf_local_storage_elem *selem);
+-bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+-                                   struct bpf_local_storage_elem *selem,
+-                                   bool uncharge_omem, bool use_trace_rcu);
+-
+ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
+ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
+index 5f7683b191998..6a1d4d22816a3 100644
+--- a/kernel/bpf/bpf_inode_storage.c
++++ b/kernel/bpf/bpf_inode_storage.c
+@@ -56,11 +56,9 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
+ void bpf_inode_storage_free(struct inode *inode)
+ {
+-      struct bpf_local_storage_elem *selem;
+       struct bpf_local_storage *local_storage;
+       bool free_inode_storage = false;
+       struct bpf_storage_blob *bsb;
+-      struct hlist_node *n;
+       bsb = bpf_inode(inode);
+       if (!bsb)
+@@ -74,30 +72,11 @@ void bpf_inode_storage_free(struct inode *inode)
+               return;
+       }
+-      /* Neither the bpf_prog nor the bpf-map's syscall
+-       * could be modifying the local_storage->list now.
+-       * Thus, no elem can be added-to or deleted-from the
+-       * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+-       *
+-       * It is racing with bpf_local_storage_map_free() alone
+-       * when unlinking elem from the local_storage->list and
+-       * the map's bucket->list.
+-       */
+       raw_spin_lock_bh(&local_storage->lock);
+-      hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+-              /* Always unlink from map before unlinking from
+-               * local_storage.
+-               */
+-              bpf_selem_unlink_map(selem);
+-              free_inode_storage = bpf_selem_unlink_storage_nolock(
+-                      local_storage, selem, false, false);
+-      }
++      free_inode_storage = bpf_local_storage_unlink_nolock(local_storage);
+       raw_spin_unlock_bh(&local_storage->lock);
+       rcu_read_unlock();
+-      /* free_inoode_storage should always be true as long as
+-       * local_storage->list was non-empty.
+-       */
+       if (free_inode_storage)
+               kfree_rcu(local_storage, rcu);
+ }
+@@ -226,23 +205,12 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
+ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = bpf_local_storage_map_alloc(attr);
+-      if (IS_ERR(smap))
+-              return ERR_CAST(smap);
+-
+-      smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+-      return &smap->map;
++      return bpf_local_storage_map_alloc(attr, &inode_cache);
+ }
+ static void inode_storage_map_free(struct bpf_map *map)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = (struct bpf_local_storage_map *)map;
+-      bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+-      bpf_local_storage_map_free(smap, NULL);
++      bpf_local_storage_map_free(map, &inode_cache, NULL);
+ }
+ BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index d9d88a2cda5e5..b1090a2b02b34 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -114,9 +114,9 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
+  * The caller must ensure selem->smap is still valid to be
+  * dereferenced for its smap->elem_size and smap->cache_idx.
+  */
+-bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
+-                                   struct bpf_local_storage_elem *selem,
+-                                   bool uncharge_mem, bool use_trace_rcu)
++static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
++                                          struct bpf_local_storage_elem *selem,
++                                          bool uncharge_mem, bool use_trace_rcu)
+ {
+       struct bpf_local_storage_map *smap;
+       bool free_local_storage;
+@@ -501,7 +501,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+       return ERR_PTR(err);
+ }
+-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
++static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+ {
+       u64 min_usage = U64_MAX;
+       u16 i, res = 0;
+@@ -525,76 +525,14 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+       return res;
+ }
+-void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
+-                                    u16 idx)
++static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
++                                           u16 idx)
+ {
+       spin_lock(&cache->idx_lock);
+       cache->idx_usage_counts[idx]--;
+       spin_unlock(&cache->idx_lock);
+ }
+-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+-                              int __percpu *busy_counter)
+-{
+-      struct bpf_local_storage_elem *selem;
+-      struct bpf_local_storage_map_bucket *b;
+-      unsigned int i;
+-
+-      /* Note that this map might be concurrently cloned from
+-       * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+-       * RCU read section to finish before proceeding. New RCU
+-       * read sections should be prevented via bpf_map_inc_not_zero.
+-       */
+-      synchronize_rcu();
+-
+-      /* bpf prog and the userspace can no longer access this map
+-       * now.  No new selem (of this map) can be added
+-       * to the owner->storage or to the map bucket's list.
+-       *
+-       * The elem of this map can be cleaned up here
+-       * or when the storage is freed e.g.
+-       * by bpf_sk_storage_free() during __sk_destruct().
+-       */
+-      for (i = 0; i < (1U << smap->bucket_log); i++) {
+-              b = &smap->buckets[i];
+-
+-              rcu_read_lock();
+-              /* No one is adding to b->list now */
+-              while ((selem = hlist_entry_safe(
+-                              rcu_dereference_raw(hlist_first_rcu(&b->list)),
+-                              struct bpf_local_storage_elem, map_node))) {
+-                      if (busy_counter) {
+-                              migrate_disable();
+-                              this_cpu_inc(*busy_counter);
+-                      }
+-                      bpf_selem_unlink(selem, false);
+-                      if (busy_counter) {
+-                              this_cpu_dec(*busy_counter);
+-                              migrate_enable();
+-                      }
+-                      cond_resched_rcu();
+-              }
+-              rcu_read_unlock();
+-      }
+-
+-      /* While freeing the storage we may still need to access the map.
+-       *
+-       * e.g. when bpf_sk_storage_free() has unlinked selem from the map
+-       * which then made the above while((selem = ...)) loop
+-       * exit immediately.
+-       *
+-       * However, while freeing the storage one still needs to access the
+-       * smap->elem_size to do the uncharging in
+-       * bpf_selem_unlink_storage_nolock().
+-       *
+-       * Hence, wait another rcu grace period for the storage to be freed.
+-       */
+-      synchronize_rcu();
+-
+-      kvfree(smap->buckets);
+-      bpf_map_area_free(smap);
+-}
+-
+ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+ {
+       if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
+@@ -614,7 +552,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+       return 0;
+ }
+-struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
++static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)
+ {
+       struct bpf_local_storage_map *smap;
+       unsigned int i;
+@@ -664,3 +602,117 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+       return 0;
+ }
++
++bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
++{
++      struct bpf_local_storage_elem *selem;
++      bool free_storage = false;
++      struct hlist_node *n;
++
++      /* Neither the bpf_prog nor the bpf_map's syscall
++       * could be modifying the local_storage->list now.
++       * Thus, no elem can be added to or deleted from the
++       * local_storage->list by the bpf_prog or by the bpf_map's syscall.
++       *
++       * It is racing with bpf_local_storage_map_free() alone
++       * when unlinking elem from the local_storage->list and
++       * the map's bucket->list.
++       */
++      hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
++              /* Always unlink from map before unlinking from
++               * local_storage.
++               */
++              bpf_selem_unlink_map(selem);
++              /* If local_storage list has only one element, the
++               * bpf_selem_unlink_storage_nolock() will return true.
++               * Otherwise, it will return false. The current loop iteration
++               * intends to remove all local storage. So the last iteration
++               * of the loop will set the free_cgroup_storage to true.
++               */
++              free_storage = bpf_selem_unlink_storage_nolock(
++                      local_storage, selem, false, false);
++      }
++
++      return free_storage;
++}
++
++struct bpf_map *
++bpf_local_storage_map_alloc(union bpf_attr *attr,
++                          struct bpf_local_storage_cache *cache)
++{
++      struct bpf_local_storage_map *smap;
++
++      smap = __bpf_local_storage_map_alloc(attr);
++      if (IS_ERR(smap))
++              return ERR_CAST(smap);
++
++      smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
++      return &smap->map;
++}
++
++void bpf_local_storage_map_free(struct bpf_map *map,
++                              struct bpf_local_storage_cache *cache,
++                              int __percpu *busy_counter)
++{
++      struct bpf_local_storage_map_bucket *b;
++      struct bpf_local_storage_elem *selem;
++      struct bpf_local_storage_map *smap;
++      unsigned int i;
++
++      smap = (struct bpf_local_storage_map *)map;
++      bpf_local_storage_cache_idx_free(cache, smap->cache_idx);
++
++      /* Note that this map might be concurrently cloned from
++       * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
++       * RCU read section to finish before proceeding. New RCU
++       * read sections should be prevented via bpf_map_inc_not_zero.
++       */
++      synchronize_rcu();
++
++      /* bpf prog and the userspace can no longer access this map
++       * now.  No new selem (of this map) can be added
++       * to the owner->storage or to the map bucket's list.
++       *
++       * The elem of this map can be cleaned up here
++       * or when the storage is freed e.g.
++       * by bpf_sk_storage_free() during __sk_destruct().
++       */
++      for (i = 0; i < (1U << smap->bucket_log); i++) {
++              b = &smap->buckets[i];
++
++              rcu_read_lock();
++              /* No one is adding to b->list now */
++              while ((selem = hlist_entry_safe(
++                              rcu_dereference_raw(hlist_first_rcu(&b->list)),
++                              struct bpf_local_storage_elem, map_node))) {
++                      if (busy_counter) {
++                              migrate_disable();
++                              this_cpu_inc(*busy_counter);
++                      }
++                      bpf_selem_unlink(selem, false);
++                      if (busy_counter) {
++                              this_cpu_dec(*busy_counter);
++                              migrate_enable();
++                      }
++                      cond_resched_rcu();
++              }
++              rcu_read_unlock();
++      }
++
++      /* While freeing the storage we may still need to access the map.
++       *
++       * e.g. when bpf_sk_storage_free() has unlinked selem from the map
++       * which then made the above while((selem = ...)) loop
++       * exit immediately.
++       *
++       * However, while freeing the storage one still needs to access the
++       * smap->elem_size to do the uncharging in
++       * bpf_selem_unlink_storage_nolock().
++       *
++       * Hence, wait another rcu grace period for the storage to be freed.
++       */
++      synchronize_rcu();
++
++      kvfree(smap->buckets);
++      bpf_map_area_free(smap);
++}
+diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
+index 6f290623347e0..40a92edd6f539 100644
+--- a/kernel/bpf/bpf_task_storage.c
++++ b/kernel/bpf/bpf_task_storage.c
+@@ -71,10 +71,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+ void bpf_task_storage_free(struct task_struct *task)
+ {
+-      struct bpf_local_storage_elem *selem;
+       struct bpf_local_storage *local_storage;
+       bool free_task_storage = false;
+-      struct hlist_node *n;
+       unsigned long flags;
+       rcu_read_lock();
+@@ -85,32 +83,13 @@ void bpf_task_storage_free(struct task_struct *task)
+               return;
+       }
+-      /* Neither the bpf_prog nor the bpf-map's syscall
+-       * could be modifying the local_storage->list now.
+-       * Thus, no elem can be added-to or deleted-from the
+-       * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+-       *
+-       * It is racing with bpf_local_storage_map_free() alone
+-       * when unlinking elem from the local_storage->list and
+-       * the map's bucket->list.
+-       */
+       bpf_task_storage_lock();
+       raw_spin_lock_irqsave(&local_storage->lock, flags);
+-      hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+-              /* Always unlink from map before unlinking from
+-               * local_storage.
+-               */
+-              bpf_selem_unlink_map(selem);
+-              free_task_storage = bpf_selem_unlink_storage_nolock(
+-                      local_storage, selem, false, false);
+-      }
++      free_task_storage = bpf_local_storage_unlink_nolock(local_storage);
+       raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+       bpf_task_storage_unlock();
+       rcu_read_unlock();
+-      /* free_task_storage should always be true as long as
+-       * local_storage->list was non-empty.
+-       */
+       if (free_task_storage)
+               kfree_rcu(local_storage, rcu);
+ }
+@@ -288,23 +267,12 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = bpf_local_storage_map_alloc(attr);
+-      if (IS_ERR(smap))
+-              return ERR_CAST(smap);
+-
+-      smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+-      return &smap->map;
++      return bpf_local_storage_map_alloc(attr, &task_cache);
+ }
+ static void task_storage_map_free(struct bpf_map *map)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = (struct bpf_local_storage_map *)map;
+-      bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+-      bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
++      bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
+ }
+ BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
+diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
+index ad01b1bea52e4..0124536e8a9db 100644
+--- a/net/core/bpf_sk_storage.c
++++ b/net/core/bpf_sk_storage.c
+@@ -48,10 +48,8 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
+ /* Called by __sk_destruct() & bpf_sk_storage_clone() */
+ void bpf_sk_storage_free(struct sock *sk)
+ {
+-      struct bpf_local_storage_elem *selem;
+       struct bpf_local_storage *sk_storage;
+       bool free_sk_storage = false;
+-      struct hlist_node *n;
+       rcu_read_lock();
+       sk_storage = rcu_dereference(sk->sk_bpf_storage);
+@@ -60,24 +58,8 @@ void bpf_sk_storage_free(struct sock *sk)
+               return;
+       }
+-      /* Netiher the bpf_prog nor the bpf-map's syscall
+-       * could be modifying the sk_storage->list now.
+-       * Thus, no elem can be added-to or deleted-from the
+-       * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
+-       *
+-       * It is racing with bpf_local_storage_map_free() alone
+-       * when unlinking elem from the sk_storage->list and
+-       * the map's bucket->list.
+-       */
+       raw_spin_lock_bh(&sk_storage->lock);
+-      hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
+-              /* Always unlink from map before unlinking from
+-               * sk_storage.
+-               */
+-              bpf_selem_unlink_map(selem);
+-              free_sk_storage = bpf_selem_unlink_storage_nolock(
+-                      sk_storage, selem, true, false);
+-      }
++      free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage);
+       raw_spin_unlock_bh(&sk_storage->lock);
+       rcu_read_unlock();
+@@ -87,23 +69,12 @@ void bpf_sk_storage_free(struct sock *sk)
+ static void bpf_sk_storage_map_free(struct bpf_map *map)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = (struct bpf_local_storage_map *)map;
+-      bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+-      bpf_local_storage_map_free(smap, NULL);
++      bpf_local_storage_map_free(map, &sk_cache, NULL);
+ }
+ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
+ {
+-      struct bpf_local_storage_map *smap;
+-
+-      smap = bpf_local_storage_map_alloc(attr);
+-      if (IS_ERR(smap))
+-              return ERR_CAST(smap);
+-
+-      smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+-      return &smap->map;
++      return bpf_local_storage_map_alloc(attr, &sk_cache);
+ }
+ static int notsupp_get_next_key(struct bpf_map *map, void *key,
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-remove-__bpf_local_storage_map_alloc.patch b/queue-6.1/bpf-remove-__bpf_local_storage_map_alloc.patch
new file mode 100644 (file)
index 0000000..41f361b
--- /dev/null
@@ -0,0 +1,110 @@
+From 24b661a9da3e3ae93686a9005c609a55b0c3f142 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Mar 2023 22:59:22 -0800
+Subject: bpf: Remove __bpf_local_storage_map_alloc
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit 62827d612ae525695799b3635a087cb49c55e977 ]
+
+bpf_local_storage_map_alloc() is the only caller of
+__bpf_local_storage_map_alloc().  The remaining logic in
+bpf_local_storage_map_alloc() is only a one liner setting
+the smap->cache_idx.
+
+Remove __bpf_local_storage_map_alloc() to simplify code.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://lore.kernel.org/r/20230308065936.1550103-4-martin.lau@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: af253aef183a ("bpf: fix order of args in call to bpf_map_kvcalloc")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_local_storage.c | 63 ++++++++++++++--------------------
+ 1 file changed, 26 insertions(+), 37 deletions(-)
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index 8ea65973739e4..888b8e481083f 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -552,40 +552,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+       return 0;
+ }
+-static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)
+-{
+-      struct bpf_local_storage_map *smap;
+-      unsigned int i;
+-      u32 nbuckets;
+-
+-      smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
+-      if (!smap)
+-              return ERR_PTR(-ENOMEM);
+-      bpf_map_init_from_attr(&smap->map, attr);
+-
+-      nbuckets = roundup_pow_of_two(num_possible_cpus());
+-      /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+-      nbuckets = max_t(u32, 2, nbuckets);
+-      smap->bucket_log = ilog2(nbuckets);
+-
+-      smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
+-                                       nbuckets, GFP_USER | __GFP_NOWARN);
+-      if (!smap->buckets) {
+-              bpf_map_area_free(smap);
+-              return ERR_PTR(-ENOMEM);
+-      }
+-
+-      for (i = 0; i < nbuckets; i++) {
+-              INIT_HLIST_HEAD(&smap->buckets[i].list);
+-              raw_spin_lock_init(&smap->buckets[i].lock);
+-      }
+-
+-      smap->elem_size = offsetof(struct bpf_local_storage_elem,
+-                                 sdata.data[attr->value_size]);
+-
+-      return smap;
+-}
+-
+ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+                                   const struct btf *btf,
+                                   const struct btf_type *key_type,
+@@ -641,10 +607,33 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
+                           struct bpf_local_storage_cache *cache)
+ {
+       struct bpf_local_storage_map *smap;
++      unsigned int i;
++      u32 nbuckets;
++
++      smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
++      if (!smap)
++              return ERR_PTR(-ENOMEM);
++      bpf_map_init_from_attr(&smap->map, attr);
++
++      nbuckets = roundup_pow_of_two(num_possible_cpus());
++      /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
++      nbuckets = max_t(u32, 2, nbuckets);
++      smap->bucket_log = ilog2(nbuckets);
+-      smap = __bpf_local_storage_map_alloc(attr);
+-      if (IS_ERR(smap))
+-              return ERR_CAST(smap);
++      smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
++                                       nbuckets, GFP_USER | __GFP_NOWARN);
++      if (!smap->buckets) {
++              bpf_map_area_free(smap);
++              return ERR_PTR(-ENOMEM);
++      }
++
++      for (i = 0; i < nbuckets; i++) {
++              INIT_HLIST_HEAD(&smap->buckets[i].list);
++              raw_spin_lock_init(&smap->buckets[i].lock);
++      }
++
++      smap->elem_size = offsetof(struct bpf_local_storage_elem,
++                                 sdata.data[attr->value_size]);
+       smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
+       return &smap->map;
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-use-bpf_map_kvcalloc-in-bpf_local_storage.patch b/queue-6.1/bpf-use-bpf_map_kvcalloc-in-bpf_local_storage.patch
new file mode 100644 (file)
index 0000000..f3b715c
--- /dev/null
@@ -0,0 +1,98 @@
+From daf3c08fc32bf497e5f7752034b59838dcbe0cad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Feb 2023 15:47:32 +0000
+Subject: bpf: use bpf_map_kvcalloc in bpf_local_storage
+
+From: Yafang Shao <laoar.shao@gmail.com>
+
+[ Upstream commit ddef81b5fd1da4d7c3cc8785d2043b73b72f38ef ]
+
+Introduce new helper bpf_map_kvcalloc() for the memory allocation in
+bpf_local_storage(). Then the allocation will charge the memory from the
+map instead of from current, though currently they are the same thing as
+it is only used in map creation path now. By charging map's memory into
+the memcg from the map, it will be more clear.
+
+Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
+Link: https://lore.kernel.org/r/20230210154734.4416-3-laoar.shao@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Stable-dep-of: af253aef183a ("bpf: fix order of args in call to bpf_map_kvcalloc")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf.h            |  8 ++++++++
+ kernel/bpf/bpf_local_storage.c |  4 ++--
+ kernel/bpf/syscall.c           | 15 +++++++++++++++
+ 3 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 1ca1902af23e9..6b18b8da025f9 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -1777,6 +1777,8 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
+ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
+                          int node);
+ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
++void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
++                     gfp_t flags);
+ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
+                                   size_t align, gfp_t flags);
+ #else
+@@ -1793,6 +1795,12 @@ bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+       return kzalloc(size, flags);
+ }
++static inline void *
++bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags)
++{
++      return kvcalloc(n, size, flags);
++}
++
+ static inline void __percpu *
+ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
+                    gfp_t flags)
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index f8dd7c516e320..8ea65973739e4 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -568,8 +568,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
+       nbuckets = max_t(u32, 2, nbuckets);
+       smap->bucket_log = ilog2(nbuckets);
+-      smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
+-                               GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
++      smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
++                                       nbuckets, GFP_USER | __GFP_NOWARN);
+       if (!smap->buckets) {
+               bpf_map_area_free(smap);
+               return ERR_PTR(-ENOMEM);
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 1e46a84694b8a..d77597daa0022 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -470,6 +470,21 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+       return ptr;
+ }
++void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
++                     gfp_t flags)
++{
++      struct mem_cgroup *memcg, *old_memcg;
++      void *ptr;
++
++      memcg = bpf_map_get_memcg(map);
++      old_memcg = set_active_memcg(memcg);
++      ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
++      set_active_memcg(old_memcg);
++      mem_cgroup_put(memcg);
++
++      return ptr;
++}
++
+ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
+                                   size_t align, gfp_t flags)
+ {
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-add-missing-lock-protection-when-polling.patch b/queue-6.1/cachefiles-add-missing-lock-protection-when-polling.patch
new file mode 100644 (file)
index 0000000..5fe6b34
--- /dev/null
@@ -0,0 +1,56 @@
+From 7b278c2f2b77afa61fb5e3a7fbe2af01165f3e55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:30 +0800
+Subject: cachefiles: add missing lock protection when polling
+
+From: Jingbo Xu <jefflexu@linux.alibaba.com>
+
+[ Upstream commit cf5bb09e742a9cf6349127e868329a8f69b7a014 ]
+
+Add missing lock protection in poll routine when iterating xarray,
+otherwise:
+
+Even with RCU read lock held, only the slot of the radix tree is
+ensured to be pinned there, while the data structure (e.g. struct
+cachefiles_req) stored in the slot has no such guarantee.  The poll
+routine will iterate the radix tree and dereference cachefiles_req
+accordingly.  Thus RCU read lock is not adequate in this case and
+spinlock is needed here.
+
+Fixes: b817e22b2e91 ("cachefiles: narrow the scope of triggering EPOLLIN events in ondemand mode")
+Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-10-libaokun@huaweicloud.com
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/daemon.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
+index 06cdf1a8a16f6..89b11336a8369 100644
+--- a/fs/cachefiles/daemon.c
++++ b/fs/cachefiles/daemon.c
+@@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
+       if (cachefiles_in_ondemand_mode(cache)) {
+               if (!xa_empty(&cache->reqs)) {
+-                      rcu_read_lock();
++                      xas_lock(&xas);
+                       xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
+                               if (!cachefiles_ondemand_is_reopening_read(req)) {
+                                       mask |= EPOLLIN;
+                                       break;
+                               }
+                       }
+-                      rcu_read_unlock();
++                      xas_unlock(&xas);
+               }
+       } else {
+               if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-cancel-all-requests-for-the-object-that-i.patch b/queue-6.1/cachefiles-cancel-all-requests-for-the-object-that-i.patch
new file mode 100644 (file)
index 0000000..e50dc8b
--- /dev/null
@@ -0,0 +1,67 @@
+From 3711211fd0d34743a5a3afc4a784be53de0ec0d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:27 +0800
+Subject: cachefiles: cancel all requests for the object that is being dropped
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 751f524635a4f076117d714705eeddadaf6748ee ]
+
+Because after an object is dropped, requests for that object are useless,
+cancel them to avoid causing other problems.
+
+This prepares for the later addition of cancel_work_sync(). After the
+reopen requests is generated, cancel it to avoid cancel_work_sync()
+blocking by waiting for daemon to complete the reopen requests.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-7-libaokun@huaweicloud.com
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 12e009d60852 ("cachefiles: wait for ondemand_object_worker to finish when dropping object")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/ondemand.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
+index cc2de0e3ee60f..acaecfce8aaa9 100644
+--- a/fs/cachefiles/ondemand.c
++++ b/fs/cachefiles/ondemand.c
+@@ -636,12 +636,31 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
+ void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
+ {
++      unsigned long index;
++      struct cachefiles_req *req;
++      struct cachefiles_cache *cache;
++
+       if (!object->ondemand)
+               return;
+       cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
+                       cachefiles_ondemand_init_close_req, NULL);
++
++      if (!object->ondemand->ondemand_id)
++              return;
++
++      /* Cancel all requests for the object that is being dropped. */
++      cache = object->volume->cache;
++      xa_lock(&cache->reqs);
+       cachefiles_ondemand_set_object_dropping(object);
++      xa_for_each(&cache->reqs, index, req) {
++              if (req->object == object) {
++                      req->error = -EIO;
++                      complete(&req->done);
++                      __xa_erase(&cache->reqs, index);
++              }
++      }
++      xa_unlock(&cache->reqs);
+ }
+ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-cyclic-allocation-of-msg_id-to-avoid-reus.patch b/queue-6.1/cachefiles-cyclic-allocation-of-msg_id-to-avoid-reus.patch
new file mode 100644 (file)
index 0000000..3305643
--- /dev/null
@@ -0,0 +1,123 @@
+From 601a171f1123cb278b169ed9a2f135eaa2a53920 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:29 +0800
+Subject: cachefiles: cyclic allocation of msg_id to avoid reuse
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 19f4f399091478c95947f6bd7ad61622300c30d9 ]
+
+Reusing the msg_id after a maliciously completed reopen request may cause
+a read request to remain unprocessed and result in a hung, as shown below:
+
+       t1       |      t2       |      t3
+-------------------------------------------------
+cachefiles_ondemand_select_req
+ cachefiles_ondemand_object_is_close(A)
+ cachefiles_ondemand_set_object_reopening(A)
+ queue_work(fscache_object_wq, &info->work)
+                ondemand_object_worker
+                 cachefiles_ondemand_init_object(A)
+                  cachefiles_ondemand_send_req(OPEN)
+                    // get msg_id 6
+                    wait_for_completion(&req_A->done)
+cachefiles_ondemand_daemon_read
+ // read msg_id 6 req_A
+ cachefiles_ondemand_get_fd
+ copy_to_user
+                                // Malicious completion msg_id 6
+                                copen 6,-1
+                                cachefiles_ondemand_copen
+                                 complete(&req_A->done)
+                                 // will not set the object to close
+                                 // because ondemand_id && fd is valid.
+
+                // ondemand_object_worker() is done
+                // but the object is still reopening.
+
+                                // new open req_B
+                                cachefiles_ondemand_init_object(B)
+                                 cachefiles_ondemand_send_req(OPEN)
+                                 // reuse msg_id 6
+process_open_req
+ copen 6,A.size
+ // The expected failed copen was executed successfully
+
+Expect copen to fail, and when it does, it closes fd, which sets the
+object to close, and then close triggers reopen again. However, due to
+msg_id reuse resulting in a successful copen, the anonymous fd is not
+closed until the daemon exits. Therefore read requests waiting for reopen
+to complete may trigger hung task.
+
+To avoid this issue, allocate the msg_id cyclically to avoid reusing the
+msg_id for a very short duration of time.
+
+Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-9-libaokun@huaweicloud.com
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/internal.h |  1 +
+ fs/cachefiles/ondemand.c | 20 ++++++++++++++++----
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
+index 94f59123726ca..111ad6ecd4baf 100644
+--- a/fs/cachefiles/internal.h
++++ b/fs/cachefiles/internal.h
+@@ -129,6 +129,7 @@ struct cachefiles_cache {
+       unsigned long                   req_id_next;
+       struct xarray                   ondemand_ids;   /* xarray for ondemand_id allocation */
+       u32                             ondemand_id_next;
++      u32                             msg_id_next;
+ };
+ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
+diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
+index 1f6561814e702..51173ab6dbd84 100644
+--- a/fs/cachefiles/ondemand.c
++++ b/fs/cachefiles/ondemand.c
+@@ -505,20 +505,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
+               smp_mb();
+               if (opcode == CACHEFILES_OP_CLOSE &&
+-                      !cachefiles_ondemand_object_is_open(object)) {
++                  !cachefiles_ondemand_object_is_open(object)) {
+                       WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
+                       xas_unlock(&xas);
+                       ret = -EIO;
+                       goto out;
+               }
+-              xas.xa_index = 0;
++              /*
++               * Cyclically find a free xas to avoid msg_id reuse that would
++               * cause the daemon to successfully copen a stale msg_id.
++               */
++              xas.xa_index = cache->msg_id_next;
+               xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
++              if (xas.xa_node == XAS_RESTART) {
++                      xas.xa_index = 0;
++                      xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK);
++              }
+               if (xas.xa_node == XAS_RESTART)
+                       xas_set_err(&xas, -EBUSY);
++
+               xas_store(&xas, req);
+-              xas_clear_mark(&xas, XA_FREE_MARK);
+-              xas_set_mark(&xas, CACHEFILES_REQ_NEW);
++              if (xas_valid(&xas)) {
++                      cache->msg_id_next = xas.xa_index + 1;
++                      xas_clear_mark(&xas, XA_FREE_MARK);
++                      xas_set_mark(&xas, CACHEFILES_REQ_NEW);
++              }
+               xas_unlock(&xas);
+       } while (xas_nomem(&xas, GFP_KERNEL));
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-narrow-the-scope-of-triggering-epollin-ev.patch b/queue-6.1/cachefiles-narrow-the-scope-of-triggering-epollin-ev.patch
new file mode 100644 (file)
index 0000000..f5fe47b
--- /dev/null
@@ -0,0 +1,90 @@
+From bba388b65ccb4c6943dffceaf58b5b059f4c27a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Nov 2023 12:14:21 +0800
+Subject: cachefiles: narrow the scope of triggering EPOLLIN events in ondemand
+ mode
+
+From: Jia Zhu <zhujia.zj@bytedance.com>
+
+[ Upstream commit b817e22b2e91257ace32a6768c3c003faeaa1c5c ]
+
+Don't trigger EPOLLIN when there are only reopening read requests in
+xarray.
+
+Suggested-by: Xin Yin <yinxin.x@bytedance.com>
+Signed-off-by: Jia Zhu <zhujia.zj@bytedance.com>
+Link: https://lore.kernel.org/r/20231120041422.75170-5-zhujia.zj@bytedance.com
+Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
+Reviewed-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 12e009d60852 ("cachefiles: wait for ondemand_object_worker to finish when dropping object")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/daemon.c   | 14 ++++++++++++--
+ fs/cachefiles/internal.h | 12 ++++++++++++
+ 2 files changed, 24 insertions(+), 2 deletions(-)
+
+diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
+index b9945e4f697be..06cdf1a8a16f6 100644
+--- a/fs/cachefiles/daemon.c
++++ b/fs/cachefiles/daemon.c
+@@ -357,14 +357,24 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
+                                          struct poll_table_struct *poll)
+ {
+       struct cachefiles_cache *cache = file->private_data;
++      XA_STATE(xas, &cache->reqs, 0);
++      struct cachefiles_req *req;
+       __poll_t mask;
+       poll_wait(file, &cache->daemon_pollwq, poll);
+       mask = 0;
+       if (cachefiles_in_ondemand_mode(cache)) {
+-              if (!xa_empty(&cache->reqs))
+-                      mask |= EPOLLIN;
++              if (!xa_empty(&cache->reqs)) {
++                      rcu_read_lock();
++                      xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
++                              if (!cachefiles_ondemand_is_reopening_read(req)) {
++                                      mask |= EPOLLIN;
++                                      break;
++                              }
++                      }
++                      rcu_read_unlock();
++              }
+       } else {
+               if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
+                       mask |= EPOLLIN;
+diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
+index 3eea52462fc87..e0eac16e4741c 100644
+--- a/fs/cachefiles/internal.h
++++ b/fs/cachefiles/internal.h
+@@ -335,6 +335,13 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
+ CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
+ CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
+ CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
++
++static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
++{
++      return cachefiles_ondemand_object_is_reopening(req->object) &&
++                      req->msg.opcode == CACHEFILES_OP_READ;
++}
++
+ #else
+ static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
+                                       char __user *_buffer, size_t buflen)
+@@ -365,6 +372,11 @@ static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *ob
+ static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj)
+ {
+ }
++
++static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
++{
++      return false;
++}
+ #endif
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-propagate-errors-from-vfs_getxattr-to-avo.patch b/queue-6.1/cachefiles-propagate-errors-from-vfs_getxattr-to-avo.patch
new file mode 100644 (file)
index 0000000..f779664
--- /dev/null
@@ -0,0 +1,70 @@
+From c21daeb059692c28f47f32a2ec401c964c63db26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:25 +0800
+Subject: cachefiles: propagate errors from vfs_getxattr() to avoid infinite
+ loop
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 0ece614a52bc9d219b839a6a29282b30d10e0c48 ]
+
+In cachefiles_check_volume_xattr(), the error returned by vfs_getxattr()
+is not passed to ret, so it ends up returning -ESTALE, which leads to an
+endless loop as follows:
+
+cachefiles_acquire_volume
+retry:
+  ret = cachefiles_check_volume_xattr
+    ret = -ESTALE
+    xlen = vfs_getxattr // return -EIO
+    // The ret is not updated when xlen < 0, so -ESTALE is returned.
+    return ret
+  // Supposed to jump out of the loop at this judgement.
+  if (ret != -ESTALE)
+      goto error_dir;
+  cachefiles_bury_object
+    //  EIO causes rename failure
+  goto retry;
+
+Hence propagate the error returned by vfs_getxattr() to avoid the above
+issue. Do the same in cachefiles_check_auxdata().
+
+Fixes: 32e150037dce ("fscache, cachefiles: Store the volume coherency data")
+Fixes: 72b957856b0c ("cachefiles: Implement metadata/coherency data storage in xattrs")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-5-libaokun@huaweicloud.com
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/xattr.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
+index 00b087c14995a..0ecfc9065047c 100644
+--- a/fs/cachefiles/xattr.c
++++ b/fs/cachefiles/xattr.c
+@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
+       if (xlen == 0)
+               xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, tlen);
+       if (xlen != tlen) {
+-              if (xlen < 0)
++              if (xlen < 0) {
++                      ret = xlen;
+                       trace_cachefiles_vfs_error(object, file_inode(file), xlen,
+                                                  cachefiles_trace_getxattr_error);
++              }
+               if (xlen == -EIO)
+                       cachefiles_io_error_obj(
+                               object,
+@@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
+               xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, buf, len);
+       if (xlen != len) {
+               if (xlen < 0) {
++                      ret = xlen;
+                       trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
+                                                  cachefiles_trace_getxattr_error);
+                       if (xlen == -EIO)
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-stop-sending-new-request-when-dropping-ob.patch b/queue-6.1/cachefiles-stop-sending-new-request-when-dropping-ob.patch
new file mode 100644 (file)
index 0000000..7cbc144
--- /dev/null
@@ -0,0 +1,92 @@
+From fc276fa28a5fac97abd03c720dccb0d9c9c2bb0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:26 +0800
+Subject: cachefiles: stop sending new request when dropping object
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit b2415d1f4566b6939acacc69637eaa57815829c1 ]
+
+Added CACHEFILES_ONDEMAND_OBJSTATE_DROPPING indicates that the cachefiles
+object is being dropped, and is set after the close request for the dropped
+object completes, and no new requests are allowed to be sent after this
+state.
+
+This prepares for the later addition of cancel_work_sync(). It prevents
+leftover reopen requests from being sent, to avoid processing unnecessary
+requests and to avoid cancel_work_sync() blocking by waiting for daemon to
+complete the reopen requests.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-6-libaokun@huaweicloud.com
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 12e009d60852 ("cachefiles: wait for ondemand_object_worker to finish when dropping object")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/internal.h |  2 ++
+ fs/cachefiles/ondemand.c | 10 ++++++++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
+index e0eac16e4741c..94f59123726ca 100644
+--- a/fs/cachefiles/internal.h
++++ b/fs/cachefiles/internal.h
+@@ -48,6 +48,7 @@ enum cachefiles_object_state {
+       CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
+       CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
+       CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
++      CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */
+ };
+ struct cachefiles_ondemand_info {
+@@ -335,6 +336,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
+ CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
+ CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
+ CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
++CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING);
+ static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
+ {
+diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
+index 4b39f0422e590..cc2de0e3ee60f 100644
+--- a/fs/cachefiles/ondemand.c
++++ b/fs/cachefiles/ondemand.c
+@@ -494,7 +494,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
+                */
+               xas_lock(&xas);
+-              if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
++              if (test_bit(CACHEFILES_DEAD, &cache->flags) ||
++                  cachefiles_ondemand_object_is_dropping(object)) {
+                       xas_unlock(&xas);
+                       ret = -EIO;
+                       goto out;
+@@ -535,7 +536,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
+        * If error occurs after creating the anonymous fd,
+        * cachefiles_ondemand_fd_release() will set object to close.
+        */
+-      if (opcode == CACHEFILES_OP_OPEN)
++      if (opcode == CACHEFILES_OP_OPEN &&
++          !cachefiles_ondemand_object_is_dropping(object))
+               cachefiles_ondemand_set_object_close(object);
+       kfree(req);
+       return ret;
+@@ -634,8 +636,12 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
+ void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
+ {
++      if (!object->ondemand)
++              return;
++
+       cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
+                       cachefiles_ondemand_init_close_req, NULL);
++      cachefiles_ondemand_set_object_dropping(object);
+ }
+ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
+-- 
+2.43.0
+
diff --git a/queue-6.1/cachefiles-wait-for-ondemand_object_worker-to-finish.patch b/queue-6.1/cachefiles-wait-for-ondemand_object_worker-to-finish.patch
new file mode 100644 (file)
index 0000000..25b525e
--- /dev/null
@@ -0,0 +1,78 @@
+From 480dcd6b96a48aad7fd10aa9e50a3bd5896a66f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jun 2024 14:29:28 +0800
+Subject: cachefiles: wait for ondemand_object_worker to finish when dropping
+ object
+
+From: Hou Tao <houtao1@huawei.com>
+
+[ Upstream commit 12e009d60852f7bce0afc373ca0b320f14150418 ]
+
+When queuing ondemand_object_worker() to re-open the object,
+cachefiles_object is not pinned. The cachefiles_object may be freed when
+the pending read request is completed intentionally and the related
+erofs is umounted. If ondemand_object_worker() runs after the object is
+freed, it will incur use-after-free problem as shown below.
+
+process A  processs B  process C  process D
+
+cachefiles_ondemand_send_req()
+// send a read req X
+// wait for its completion
+
+           // close ondemand fd
+           cachefiles_ondemand_fd_release()
+           // set object as CLOSE
+
+                       cachefiles_ondemand_daemon_read()
+                       // set object as REOPENING
+                       queue_work(fscache_wq, &info->ondemand_work)
+
+                                // close /dev/cachefiles
+                                cachefiles_daemon_release
+                                cachefiles_flush_reqs
+                                complete(&req->done)
+
+// read req X is completed
+// umount the erofs fs
+cachefiles_put_object()
+// object will be freed
+cachefiles_ondemand_deinit_obj_info()
+kmem_cache_free(object)
+                       // both info and object are freed
+                       ondemand_object_worker()
+
+When dropping an object, it is no longer necessary to reopen the object,
+so use cancel_work_sync() to cancel or wait for ondemand_object_worker()
+to finish.
+
+Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20240628062930.2467993-8-libaokun@huaweicloud.com
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
+Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cachefiles/ondemand.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
+index acaecfce8aaa9..1f6561814e702 100644
+--- a/fs/cachefiles/ondemand.c
++++ b/fs/cachefiles/ondemand.c
+@@ -661,6 +661,9 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
+               }
+       }
+       xa_unlock(&cache->reqs);
++
++      /* Wait for ondemand_object_worker() to finish to avoid UAF. */
++      cancel_work_sync(&object->ondemand->ondemand_work);
+ }
+ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
+-- 
+2.43.0
+
diff --git a/queue-6.1/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch b/queue-6.1/ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch
new file mode 100644 (file)
index 0000000..2fd3a0e
--- /dev/null
@@ -0,0 +1,122 @@
+From 756b80f53663f02d748723e7504c020cfc17bcda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Jul 2024 08:19:43 +0200
+Subject: ethtool: netlink: do not return SQI value if link is down
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit c184cf94e73b04ff7048d045f5413899bc664788 ]
+
+Do not attach SQI value if link is down. "SQI values are only valid if
+link-up condition is present" per OpenAlliance specification of
+100Base-T1 Interoperability Test suite [1]. The same rule would apply
+for other link types.
+
+[1] https://opensig.org/automotive-ethernet-specifications/#
+
+Fixes: 806602191592 ("ethtool: provide UAPI for PHY Signal Quality Index (SQI)")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Woojung Huh <woojung.huh@microchip.com>
+Link: https://patch.msgid.link/20240709061943.729381-1-o.rempel@pengutronix.de
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/linkstate.c | 41 ++++++++++++++++++++++++++++-------------
+ 1 file changed, 28 insertions(+), 13 deletions(-)
+
+diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
+index fb676f349455a..470582a70ccbe 100644
+--- a/net/ethtool/linkstate.c
++++ b/net/ethtool/linkstate.c
+@@ -36,6 +36,8 @@ static int linkstate_get_sqi(struct net_device *dev)
+       mutex_lock(&phydev->lock);
+       if (!phydev->drv || !phydev->drv->get_sqi)
+               ret = -EOPNOTSUPP;
++      else if (!phydev->link)
++              ret = -ENETDOWN;
+       else
+               ret = phydev->drv->get_sqi(phydev);
+       mutex_unlock(&phydev->lock);
+@@ -54,6 +56,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
+       mutex_lock(&phydev->lock);
+       if (!phydev->drv || !phydev->drv->get_sqi_max)
+               ret = -EOPNOTSUPP;
++      else if (!phydev->link)
++              ret = -ENETDOWN;
+       else
+               ret = phydev->drv->get_sqi_max(phydev);
+       mutex_unlock(&phydev->lock);
+@@ -61,6 +65,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
+       return ret;
+ };
++static bool linkstate_sqi_critical_error(int sqi)
++{
++      return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
++}
++
++static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
++{
++      return data->sqi >= 0 && data->sqi_max >= 0 &&
++             data->sqi <= data->sqi_max;
++}
++
+ static int linkstate_get_link_ext_state(struct net_device *dev,
+                                       struct linkstate_reply_data *data)
+ {
+@@ -92,12 +107,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
+       data->link = __ethtool_get_link(dev);
+       ret = linkstate_get_sqi(dev);
+-      if (ret < 0 && ret != -EOPNOTSUPP)
++      if (linkstate_sqi_critical_error(ret))
+               goto out;
+       data->sqi = ret;
+       ret = linkstate_get_sqi_max(dev);
+-      if (ret < 0 && ret != -EOPNOTSUPP)
++      if (linkstate_sqi_critical_error(ret))
+               goto out;
+       data->sqi_max = ret;
+@@ -122,11 +137,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
+       len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+               + 0;
+-      if (data->sqi != -EOPNOTSUPP)
+-              len += nla_total_size(sizeof(u32));
+-
+-      if (data->sqi_max != -EOPNOTSUPP)
+-              len += nla_total_size(sizeof(u32));
++      if (linkstate_sqi_valid(data)) {
++              len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
++              len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
++      }
+       if (data->link_ext_state_provided)
+               len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
+@@ -147,13 +161,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
+           nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
+               return -EMSGSIZE;
+-      if (data->sqi != -EOPNOTSUPP &&
+-          nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+-              return -EMSGSIZE;
++      if (linkstate_sqi_valid(data)) {
++              if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
++                      return -EMSGSIZE;
+-      if (data->sqi_max != -EOPNOTSUPP &&
+-          nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
+-              return -EMSGSIZE;
++              if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
++                              data->sqi_max))
++                      return -EMSGSIZE;
++      }
+       if (data->link_ext_state_provided) {
+               if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
+-- 
+2.43.0
+
diff --git a/queue-6.1/filelock-fix-potential-use-after-free-in-posix_lock_.patch b/queue-6.1/filelock-fix-potential-use-after-free-in-posix_lock_.patch
new file mode 100644 (file)
index 0000000..91497eb
--- /dev/null
@@ -0,0 +1,50 @@
+From 6e27922202afc4a8a4ff04e6bbccd83c1d5bf81f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 18:44:48 -0400
+Subject: filelock: fix potential use-after-free in posix_lock_inode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 1b3ec4f7c03d4b07bad70697d7e2f4088d2cfe92 ]
+
+Light Hsieh reported a KASAN UAF warning in trace_posix_lock_inode().
+The request pointer had been changed earlier to point to a lock entry
+that was added to the inode's list. However, before the tracepoint could
+fire, another task raced in and freed that lock.
+
+Fix this by moving the tracepoint inside the spinlock, which should
+ensure that this doesn't happen.
+
+Fixes: 74f6f5912693 ("locks: fix KASAN: use-after-free in trace_event_raw_event_filelock_lock")
+Link: https://lore.kernel.org/linux-fsdevel/724ffb0a2962e912ea62bb0515deadf39c325112.camel@kernel.org/
+Reported-by: Light Hsieh (謝明燈) <Light.Hsieh@mediatek.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Link: https://lore.kernel.org/r/20240702-filelock-6-10-v1-1-96e766aadc98@kernel.org
+Reviewed-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/locks.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 7d0918b8fe5d6..c23bcfe9b0fdd 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1298,9 +1298,9 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+               locks_wake_up_blocks(left);
+       }
+  out:
++      trace_posix_lock_inode(inode, request, error);
+       spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
+-      trace_posix_lock_inode(inode, request, error);
+       /*
+        * Free any unused locks.
+        */
+-- 
+2.43.0
+
diff --git a/queue-6.1/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch b/queue-6.1/fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch
new file mode 100644 (file)
index 0000000..5606ffc
--- /dev/null
@@ -0,0 +1,44 @@
+From f523a86638773d00989770c97854689f6d7909f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Apr 2024 10:10:08 +0800
+Subject: fs/dcache: Re-use value stored to dentry->d_flags instead of
+ re-reading
+
+From: linke li <lilinke99@qq.com>
+
+[ Upstream commit 8bfb40be31ddea0cb4664b352e1797cfe6c91976 ]
+
+Currently, the __d_clear_type_and_inode() writes the value flags to
+dentry->d_flags, then immediately re-reads it in order to use it in a if
+statement. This re-read is useless because no other update to
+dentry->d_flags can occur at this point.
+
+This commit therefore re-use flags in the if statement instead of
+re-reading dentry->d_flags.
+
+Signed-off-by: linke li <lilinke99@qq.com>
+Link: https://lore.kernel.org/r/tencent_5E187BD0A61BA28605E85405F15228254D0A@qq.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: aabfe57ebaa7 ("vfs: don't mod negative dentry count when on shrinker list")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/dcache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index b09bc88dbbec7..9b10f1872f6c9 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -356,7 +356,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
+       flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+       WRITE_ONCE(dentry->d_flags, flags);
+       dentry->d_inode = NULL;
+-      if (dentry->d_flags & DCACHE_LRU_LIST)
++      if (flags & DCACHE_LRU_LIST)
+               this_cpu_inc(nr_dentry_negative);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/i40e-fix-xdp-program-unloading-while-removing-the-dr.patch b/queue-6.1/i40e-fix-xdp-program-unloading-while-removing-the-dr.patch
new file mode 100644 (file)
index 0000000..6b3a19b
--- /dev/null
@@ -0,0 +1,119 @@
+From 80f0aafd75846964cf0f020cdbdbf999f337e571 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 16:07:49 -0700
+Subject: i40e: Fix XDP program unloading while removing the driver
+
+From: Michal Kubiak <michal.kubiak@intel.com>
+
+[ Upstream commit 01fc5142ae6b06b61ed51a624f2732d6525d8ea3 ]
+
+The commit 6533e558c650 ("i40e: Fix reset path while removing
+the driver") introduced a new PF state "__I40E_IN_REMOVE" to block
+modifying the XDP program while the driver is being removed.
+Unfortunately, such a change is useful only if the ".ndo_bpf()"
+callback was called out of the rmmod context because unloading the
+existing XDP program is also a part of driver removing procedure.
+In other words, from the rmmod context the driver is expected to
+unload the XDP program without reporting any errors. Otherwise,
+the kernel warning with callstack is printed out to dmesg.
+
+Example failing scenario:
+ 1. Load the i40e driver.
+ 2. Load the XDP program.
+ 3. Unload the i40e driver (using "rmmod" command).
+
+The example kernel warning log:
+
+[  +0.004646] WARNING: CPU: 94 PID: 10395 at net/core/dev.c:9290 unregister_netdevice_many_notify+0x7a9/0x870
+[...]
+[  +0.010959] RIP: 0010:unregister_netdevice_many_notify+0x7a9/0x870
+[...]
+[  +0.002726] Call Trace:
+[  +0.002457]  <TASK>
+[  +0.002119]  ? __warn+0x80/0x120
+[  +0.003245]  ? unregister_netdevice_many_notify+0x7a9/0x870
+[  +0.005586]  ? report_bug+0x164/0x190
+[  +0.003678]  ? handle_bug+0x3c/0x80
+[  +0.003503]  ? exc_invalid_op+0x17/0x70
+[  +0.003846]  ? asm_exc_invalid_op+0x1a/0x20
+[  +0.004200]  ? unregister_netdevice_many_notify+0x7a9/0x870
+[  +0.005579]  ? unregister_netdevice_many_notify+0x3cc/0x870
+[  +0.005586]  unregister_netdevice_queue+0xf7/0x140
+[  +0.004806]  unregister_netdev+0x1c/0x30
+[  +0.003933]  i40e_vsi_release+0x87/0x2f0 [i40e]
+[  +0.004604]  i40e_remove+0x1a1/0x420 [i40e]
+[  +0.004220]  pci_device_remove+0x3f/0xb0
+[  +0.003943]  device_release_driver_internal+0x19f/0x200
+[  +0.005243]  driver_detach+0x48/0x90
+[  +0.003586]  bus_remove_driver+0x6d/0xf0
+[  +0.003939]  pci_unregister_driver+0x2e/0xb0
+[  +0.004278]  i40e_exit_module+0x10/0x5f0 [i40e]
+[  +0.004570]  __do_sys_delete_module.isra.0+0x197/0x310
+[  +0.005153]  do_syscall_64+0x85/0x170
+[  +0.003684]  ? syscall_exit_to_user_mode+0x69/0x220
+[  +0.004886]  ? do_syscall_64+0x95/0x170
+[  +0.003851]  ? exc_page_fault+0x7e/0x180
+[  +0.003932]  entry_SYSCALL_64_after_hwframe+0x71/0x79
+[  +0.005064] RIP: 0033:0x7f59dc9347cb
+[  +0.003648] Code: 73 01 c3 48 8b 0d 65 16 0c 00 f7 d8 64 89 01 48 83
+c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f
+05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 35 16 0c 00 f7 d8 64 89 01 48
+[  +0.018753] RSP: 002b:00007ffffac99048 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
+[  +0.007577] RAX: ffffffffffffffda RBX: 0000559b9bb2f6e0 RCX: 00007f59dc9347cb
+[  +0.007140] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 0000559b9bb2f748
+[  +0.007146] RBP: 00007ffffac99070 R08: 1999999999999999 R09: 0000000000000000
+[  +0.007133] R10: 00007f59dc9a5ac0 R11: 0000000000000206 R12: 0000000000000000
+[  +0.007141] R13: 00007ffffac992d8 R14: 0000559b9bb2f6e0 R15: 0000000000000000
+[  +0.007151]  </TASK>
+[  +0.002204] ---[ end trace 0000000000000000 ]---
+
+Fix this by checking if the XDP program is being loaded or unloaded.
+Then, block only loading a new program while "__I40E_IN_REMOVE" is set.
+Also, move testing "__I40E_IN_REMOVE" flag to the beginning of XDP_SETUP
+callback to avoid unnecessary operations and checks.
+
+Fixes: 6533e558c650 ("i40e: Fix reset path while removing the driver")
+Signed-off-by: Michal Kubiak <michal.kubiak@intel.com>
+Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://patch.msgid.link/20240708230750.625986-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 9efd4b962dce2..1194dcacbd29e 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -13315,6 +13315,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+       bool need_reset;
+       int i;
++      /* VSI shall be deleted in a moment, block loading new programs */
++      if (prog && test_bit(__I40E_IN_REMOVE, pf->state))
++              return -EINVAL;
++
+       /* Don't allow frames that span over multiple buffers */
+       if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
+               NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
+@@ -13323,14 +13327,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+       /* When turning XDP on->off/off->on we reset and rebuild the rings. */
+       need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
+-
+       if (need_reset)
+               i40e_prep_for_reset(pf);
+-      /* VSI shall be deleted in a moment, just return EINVAL */
+-      if (test_bit(__I40E_IN_REMOVE, pf->state))
+-              return -EINVAL;
+-
+       old_prog = xchg(&vsi->xdp_prog, prog);
+       if (need_reset) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch b/queue-6.1/mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch
new file mode 100644 (file)
index 0000000..2fccf06
--- /dev/null
@@ -0,0 +1,44 @@
+From a2b56f162ac998de20a082bc3c7748e2f1c8b7a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 20:16:39 -0400
+Subject: mm: prevent derefencing NULL ptr in pfn_section_valid()
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 82f0b6f041fad768c28b4ad05a683065412c226e ]
+
+Commit 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing
+memory_section->usage") changed pfn_section_valid() to add a READ_ONCE()
+call around "ms->usage" to fix a race with section_deactivate() where
+ms->usage can be cleared.  The READ_ONCE() call, by itself, is not enough
+to prevent NULL pointer dereference.  We need to check its value before
+dereferencing it.
+
+Link: https://lkml.kernel.org/r/20240626001639.1350646-1-longman@redhat.com
+Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: Charan Teja Kalla <quic_charante@quicinc.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmzone.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 93d2003091222..61906244c14d6 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -1814,8 +1814,9 @@ static inline int subsection_map_index(unsigned long pfn)
+ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+ {
+       int idx = subsection_map_index(pfn);
++      struct mem_section_usage *usage = READ_ONCE(ms->usage);
+-      return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
++      return usage ? test_bit(idx, usage->subsection_map) : 0;
+ }
+ #else
+ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch b/queue-6.1/net-ethernet-lantiq_etop-fix-double-free-in-detach.patch
new file mode 100644 (file)
index 0000000..945741b
--- /dev/null
@@ -0,0 +1,43 @@
+From cfec6d893b5e7b82ed597f8d141a9b1cf2c4746d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 22:58:26 +0200
+Subject: net: ethernet: lantiq_etop: fix double free in detach
+
+From: Aleksander Jan Bajkowski <olek2@wp.pl>
+
+[ Upstream commit e1533b6319ab9c3a97dad314dd88b3783bc41b69 ]
+
+The number of the currently released descriptor is never incremented
+which results in the same skb being released multiple times.
+
+Fixes: 504d4721ee8e ("MIPS: Lantiq: Add ethernet driver")
+Reported-by: Joe Perches <joe@perches.com>
+Closes: https://lore.kernel.org/all/fc1bf93d92bb5b2f99c6c62745507cc22f3a7b2d.camel@perches.com/
+Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20240708205826.5176-1-olek2@wp.pl
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/lantiq_etop.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
+index f5961bdcc4809..61baf1da76eea 100644
+--- a/drivers/net/ethernet/lantiq_etop.c
++++ b/drivers/net/ethernet/lantiq_etop.c
+@@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
+       if (ch->dma.irq)
+               free_irq(ch->dma.irq, priv);
+       if (IS_RX(ch->idx)) {
+-              int desc;
++              struct ltq_dma_channel *dma = &ch->dma;
+-              for (desc = 0; desc < LTQ_DESC_NUM; desc++)
++              for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++)
+                       dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-ethernet-mtk-star-emac-set-mac_managed_pm-when-p.patch b/queue-6.1/net-ethernet-mtk-star-emac-set-mac_managed_pm-when-p.patch
new file mode 100644 (file)
index 0000000..66cd0eb
--- /dev/null
@@ -0,0 +1,55 @@
+From 7fa7c84c102ed338a2d455e52aa9941b0087df81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 14:52:09 +0800
+Subject: net: ethernet: mtk-star-emac: set mac_managed_pm when probing
+
+From: Jian Hui Lee <jianhui.lee@canonical.com>
+
+[ Upstream commit 8c6790b5c25dfac11b589cc37346bcf9e23ad468 ]
+
+The below commit introduced a warning message when phy state is not in
+the states: PHY_HALTED, PHY_READY, and PHY_UP.
+commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
+
+mtk-star-emac doesn't need mdiobus suspend/resume. To fix the warning
+message during resume, indicate the phy resume/suspend is managed by the
+mac when probing.
+
+Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
+Signed-off-by: Jian Hui Lee <jianhui.lee@canonical.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20240708065210.4178980-1-jianhui.lee@canonical.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_star_emac.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
+index 7050351250b7a..ad27749c0931c 100644
+--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
++++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
+@@ -1531,6 +1531,7 @@ static int mtk_star_probe(struct platform_device *pdev)
+ {
+       struct device_node *of_node;
+       struct mtk_star_priv *priv;
++      struct phy_device *phydev;
+       struct net_device *ndev;
+       struct device *dev;
+       void __iomem *base;
+@@ -1656,6 +1657,12 @@ static int mtk_star_probe(struct platform_device *pdev)
+       netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll);
+       netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll);
++      phydev = of_phy_find_device(priv->phy_node);
++      if (phydev) {
++              phydev->mac_managed_pm = true;
++              put_device(&phydev->mdio.dev);
++      }
++
+       return devm_register_netdev(dev, ndev);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-fix-rc7-s-__skb_datagram_iter.patch b/queue-6.1/net-fix-rc7-s-__skb_datagram_iter.patch
new file mode 100644 (file)
index 0000000..2ae319e
--- /dev/null
@@ -0,0 +1,45 @@
+From b10d47dcc440d3c66b9eb216f61a3458570494b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 07:46:00 -0700
+Subject: net: fix rc7's __skb_datagram_iter()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit f153831097b4435f963e385304cc0f1acba1c657 ]
+
+X would not start in my old 32-bit partition (and the "n"-handling looks
+just as wrong on 64-bit, but for whatever reason did not show up there):
+"n" must be accumulated over all pages before it's added to "offset" and
+compared with "copy", immediately after the skb_frag_foreach_page() loop.
+
+Fixes: d2d30a376d9c ("net: allow skb_datagram_iter to be called from any context")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Link: https://patch.msgid.link/fef352e8-b89a-da51-f8ce-04bc39ee6481@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/datagram.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/datagram.c b/net/core/datagram.c
+index cdd65ca3124a4..87c39cc12327f 100644
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -441,11 +441,12 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
+                       if (copy > len)
+                               copy = len;
++                      n = 0;
+                       skb_frag_foreach_page(frag,
+                                             skb_frag_off(frag) + offset - start,
+                                             copy, p, p_off, p_len, copied) {
+                               vaddr = kmap_local_page(p);
+-                              n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
++                              n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
+                                       vaddr + p_off, p_len, data, to);
+                               kunmap_local(vaddr);
+                       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-phy-microchip-lan87xx-reinit-phy-after-cable-tes.patch b/queue-6.1/net-phy-microchip-lan87xx-reinit-phy-after-cable-tes.patch
new file mode 100644 (file)
index 0000000..73816b1
--- /dev/null
@@ -0,0 +1,41 @@
+From f8b4ed67efc4725fa5fde6d4424a93e26e68490a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Jul 2024 10:49:54 +0200
+Subject: net: phy: microchip: lan87xx: reinit PHY after cable test
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 30f747b8d53bc73555f268d0f48f56174fa5bf10 ]
+
+Reinit PHY after cable test, otherwise link can't be established on
+tested port. This issue is reproducible on LAN9372 switches with
+integrated 100BaseT1 PHYs.
+
+Fixes: 788050256c411 ("net: phy: microchip_t1: add cable test support for lan87xx phy")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://patch.msgid.link/20240705084954.83048-1-o.rempel@pengutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/microchip_t1.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
+index 8569a545e0a3f..9517243e3051e 100644
+--- a/drivers/net/phy/microchip_t1.c
++++ b/drivers/net/phy/microchip_t1.c
+@@ -711,7 +711,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev)
+       ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+                               lan87xx_cable_test_report_trans(detect));
+-      return 0;
++      return phy_init_hw(phydev);
+ }
+ static int lan87xx_cable_test_get_status(struct phy_device *phydev,
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-fix-uaf-when-resolving-a-clash.patch b/queue-6.1/net-sched-fix-uaf-when-resolving-a-clash.patch
new file mode 100644 (file)
index 0000000..5806932
--- /dev/null
@@ -0,0 +1,131 @@
+From 7de8447ba1b93d5698b921b9ef49c66bed1b74fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 13:37:47 +0800
+Subject: net/sched: Fix UAF when resolving a clash
+
+From: Chengen Du <chengen.du@canonical.com>
+
+[ Upstream commit 26488172b0292bed837b95a006a3f3431d1898c3 ]
+
+KASAN reports the following UAF:
+
+ BUG: KASAN: slab-use-after-free in tcf_ct_flow_table_process_conn+0x12b/0x380 [act_ct]
+ Read of size 1 at addr ffff888c07603600 by task handler130/6469
+
+ Call Trace:
+  <IRQ>
+  dump_stack_lvl+0x48/0x70
+  print_address_description.constprop.0+0x33/0x3d0
+  print_report+0xc0/0x2b0
+  kasan_report+0xd0/0x120
+  __asan_load1+0x6c/0x80
+  tcf_ct_flow_table_process_conn+0x12b/0x380 [act_ct]
+  tcf_ct_act+0x886/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+  __irq_exit_rcu+0x82/0xc0
+  irq_exit_rcu+0xe/0x20
+  common_interrupt+0xa1/0xb0
+  </IRQ>
+  <TASK>
+  asm_common_interrupt+0x27/0x40
+
+ Allocated by task 6469:
+  kasan_save_stack+0x38/0x70
+  kasan_set_track+0x25/0x40
+  kasan_save_alloc_info+0x1e/0x40
+  __kasan_krealloc+0x133/0x190
+  krealloc+0xaa/0x130
+  nf_ct_ext_add+0xed/0x230 [nf_conntrack]
+  tcf_ct_act+0x1095/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+
+ Freed by task 6469:
+  kasan_save_stack+0x38/0x70
+  kasan_set_track+0x25/0x40
+  kasan_save_free_info+0x2b/0x60
+  ____kasan_slab_free+0x180/0x1f0
+  __kasan_slab_free+0x12/0x30
+  slab_free_freelist_hook+0xd2/0x1a0
+  __kmem_cache_free+0x1a2/0x2f0
+  kfree+0x78/0x120
+  nf_conntrack_free+0x74/0x130 [nf_conntrack]
+  nf_ct_destroy+0xb2/0x140 [nf_conntrack]
+  __nf_ct_resolve_clash+0x529/0x5d0 [nf_conntrack]
+  nf_ct_resolve_clash+0xf6/0x490 [nf_conntrack]
+  __nf_conntrack_confirm+0x2c6/0x770 [nf_conntrack]
+  tcf_ct_act+0x12ad/0x1350 [act_ct]
+  tcf_action_exec+0xf8/0x1f0
+  fl_classify+0x355/0x360 [cls_flower]
+  __tcf_classify+0x1fd/0x330
+  tcf_classify+0x21c/0x3c0
+  sch_handle_ingress.constprop.0+0x2c5/0x500
+  __netif_receive_skb_core.constprop.0+0xb25/0x1510
+  __netif_receive_skb_list_core+0x220/0x4c0
+  netif_receive_skb_list_internal+0x446/0x620
+  napi_complete_done+0x157/0x3d0
+  gro_cell_poll+0xcf/0x100
+  __napi_poll+0x65/0x310
+  net_rx_action+0x30c/0x5c0
+  __do_softirq+0x14f/0x491
+
+The ct may be dropped if a clash has been resolved but is still passed to
+the tcf_ct_flow_table_process_conn function for further usage. This issue
+can be fixed by retrieving ct from skb again after confirming conntrack.
+
+Fixes: 0cc254e5aa37 ("net/sched: act_ct: Offload connections with commit action")
+Co-developed-by: Gerald Yang <gerald.yang@canonical.com>
+Signed-off-by: Gerald Yang <gerald.yang@canonical.com>
+Signed-off-by: Chengen Du <chengen.du@canonical.com>
+Link: https://patch.msgid.link/20240710053747.13223-1-chengen.du@canonical.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_ct.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
+index cd95a315fde82..44ff7f356ec15 100644
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -1212,6 +1212,14 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+                */
+               if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+                       goto drop;
++
++              /* The ct may be dropped if a clash has been resolved,
++               * so it's necessary to retrieve it from skb again to
++               * prevent UAF.
++               */
++              ct = nf_ct_get(skb, &ctinfo);
++              if (!ct)
++                      skip_add = true;
+       }
+       if (!skip_add)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sunrpc-remap-eperm-in-case-of-connection-failure.patch b/queue-6.1/net-sunrpc-remap-eperm-in-case-of-connection-failure.patch
new file mode 100644 (file)
index 0000000..038540a
--- /dev/null
@@ -0,0 +1,64 @@
+From 47cf0624e2d33d14d0dcc3e8ff3627e21ea0339b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Jul 2024 08:41:57 +0200
+Subject: net, sunrpc: Remap EPERM in case of connection failure in
+ xs_tcp_setup_socket
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 626dfed5fa3bfb41e0dffd796032b555b69f9cde ]
+
+When using a BPF program on kernel_connect(), the call can return -EPERM. This
+causes xs_tcp_setup_socket() to loop forever, filling up the syslog and causing
+the kernel to potentially freeze up.
+
+Neil suggested:
+
+  This will propagate -EPERM up into other layers which might not be ready
+  to handle it. It might be safer to map EPERM to an error we would be more
+  likely to expect from the network system - such as ECONNREFUSED or ENETDOWN.
+
+ECONNREFUSED as error seems reasonable. For programs setting a different error
+can be out of reach (see handling in 4fbac77d2d09) in particular on kernels
+which do not have f10d05966196 ("bpf: Make BPF_PROG_RUN_ARRAY return -err
+instead of allow boolean"), thus given that it is better to simply remap for
+consistent behavior. UDP does handle EPERM in xs_udp_send_request().
+
+Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect")
+Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind")
+Co-developed-by: Lex Siegel <usiegl00@gmail.com>
+Signed-off-by: Lex Siegel <usiegl00@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Neil Brown <neilb@suse.de>
+Cc: Trond Myklebust <trondmy@kernel.org>
+Cc: Anna Schumaker <anna@kernel.org>
+Link: https://github.com/cilium/cilium/issues/33395
+Link: https://lore.kernel.org/bpf/171374175513.12877.8993642908082014881@noble.neil.brown.name
+Link: https://patch.msgid.link/9069ec1d59e4b2129fc23433349fd5580ad43921.1720075070.git.daniel@iogearbox.net
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/xprtsock.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index 05aa32696e7c2..02f651f85e739 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -2333,6 +2333,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
+               transport->srcport = 0;
+               status = -EAGAIN;
+               break;
++      case -EPERM:
++              /* Happens, for instance, if a BPF program is preventing
++               * the connect. Remap the error so upper layers can better
++               * deal with it.
++               */
++              status = -ECONNREFUSED;
++              fallthrough;
+       case -EINVAL:
+               /* Happens, for instance, if the user specified a link
+                * local IPv6 address without a scope-id.
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch b/queue-6.1/octeontx2-af-fix-incorrect-value-output-on-error-pat.patch
new file mode 100644 (file)
index 0000000..6139bcb
--- /dev/null
@@ -0,0 +1,44 @@
+From 5a964e46e1aaa12cfb118a93529cc56dfe66349b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Jul 2024 12:53:17 +0300
+Subject: octeontx2-af: Fix incorrect value output on error path in
+ rvu_check_rsrc_availability()
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit 442e26af9aa8115c96541026cbfeaaa76c85d178 ]
+
+In rvu_check_rsrc_availability() in case of invalid SSOW req, an incorrect
+data is printed to error log. 'req->sso' value is printed instead of
+'req->ssow'. Looks like "copy-paste" mistake.
+
+Fix this mistake by replacing 'req->sso' with 'req->ssow'.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 746ea74241fa ("octeontx2-af: Add RVU block LF provisioning support")
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20240705095317.12640-1-amishin@t-argos.ru
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index a7034b47ed6c9..c7829265eade9 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -1638,7 +1638,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu,
+               if (req->ssow > block->lf.max) {
+                       dev_err(&rvu->pdev->dev,
+                               "Func 0x%x: Invalid SSOW req, %d > max %d\n",
+-                               pcifunc, req->sso, block->lf.max);
++                               pcifunc, req->ssow, block->lf.max);
+                       return -EINVAL;
+               }
+               mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);
+-- 
+2.43.0
+
diff --git a/queue-6.1/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch b/queue-6.1/ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch
new file mode 100644 (file)
index 0000000..5735201
--- /dev/null
@@ -0,0 +1,67 @@
+From 3277d7c93db469c71caed9dc9e7be624c1fc42eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Jul 2024 14:56:15 +0300
+Subject: ppp: reject claimed-as-LCP but actually malformed packets
+
+From: Dmitry Antipov <dmantipov@yandex.ru>
+
+[ Upstream commit f2aeb7306a898e1cbd03963d376f4b6656ca2b55 ]
+
+Since 'ppp_async_encode()' assumes valid LCP packets (with code
+from 1 to 7 inclusive), add 'ppp_check_packet()' to ensure that
+LCP packet has an actual body beyond PPP_LCP header bytes, and
+reject claimed-as-LCP but actually malformed data otherwise.
+
+Reported-by: syzbot+ec0723ba9605678b14bf@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=ec0723ba9605678b14bf
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/ppp_generic.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index 1d71f5276241c..5a6fa566e722f 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -70,6 +70,7 @@
+ #define MPHDRLEN_SSN  4       /* ditto with short sequence numbers */
+ #define PPP_PROTO_LEN 2
++#define PPP_LCP_HDRLEN        4
+ /*
+  * An instance of /dev/ppp can be associated with either a ppp
+@@ -491,6 +492,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
+       return ret;
+ }
++static bool ppp_check_packet(struct sk_buff *skb, size_t count)
++{
++      /* LCP packets must include LCP header which 4 bytes long:
++       * 1-byte code, 1-byte identifier, and 2-byte length.
++       */
++      return get_unaligned_be16(skb->data) != PPP_LCP ||
++              count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN;
++}
++
+ static ssize_t ppp_write(struct file *file, const char __user *buf,
+                        size_t count, loff_t *ppos)
+ {
+@@ -513,6 +523,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
+               kfree_skb(skb);
+               goto out;
+       }
++      ret = -EINVAL;
++      if (unlikely(!ppp_check_packet(skb, count))) {
++              kfree_skb(skb);
++              goto out;
++      }
+       switch (pf->kind) {
+       case INTERFACE:
+-- 
+2.43.0
+
diff --git a/queue-6.1/series b/queue-6.1/series
new file mode 100644 (file)
index 0000000..5d2462a
--- /dev/null
@@ -0,0 +1,29 @@
+mm-prevent-derefencing-null-ptr-in-pfn_section_valid.patch
+cachefiles-propagate-errors-from-vfs_getxattr-to-avo.patch
+cachefiles-narrow-the-scope-of-triggering-epollin-ev.patch
+cachefiles-stop-sending-new-request-when-dropping-ob.patch
+cachefiles-cancel-all-requests-for-the-object-that-i.patch
+cachefiles-wait-for-ondemand_object_worker-to-finish.patch
+cachefiles-cyclic-allocation-of-msg_id-to-avoid-reus.patch
+cachefiles-add-missing-lock-protection-when-polling.patch
+filelock-fix-potential-use-after-free-in-posix_lock_.patch
+fs-dcache-re-use-value-stored-to-dentry-d_flags-inst.patch
+vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch
+tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch
+net-phy-microchip-lan87xx-reinit-phy-after-cable-tes.patch
+skmsg-skip-zero-length-skb-in-sk_msg_recvmsg.patch
+octeontx2-af-fix-incorrect-value-output-on-error-pat.patch
+net-fix-rc7-s-__skb_datagram_iter.patch
+i40e-fix-xdp-program-unloading-while-removing-the-dr.patch
+net-ethernet-lantiq_etop-fix-double-free-in-detach.patch
+bpf-refactor-some-inode-task-sk-storage-functions-fo.patch
+bpf-reduce-smap-elem_size.patch
+bpf-use-bpf_map_kvcalloc-in-bpf_local_storage.patch
+bpf-remove-__bpf_local_storage_map_alloc.patch
+bpf-fix-order-of-args-in-call-to-bpf_map_kvcalloc.patch
+net-ethernet-mtk-star-emac-set-mac_managed_pm-when-p.patch
+ppp-reject-claimed-as-lcp-but-actually-malformed-pac.patch
+ethtool-netlink-do-not-return-sqi-value-if-link-is-d.patch
+udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch
+net-sched-fix-uaf-when-resolving-a-clash.patch
+net-sunrpc-remap-eperm-in-case-of-connection-failure.patch
diff --git a/queue-6.1/skmsg-skip-zero-length-skb-in-sk_msg_recvmsg.patch b/queue-6.1/skmsg-skip-zero-length-skb-in-sk_msg_recvmsg.patch
new file mode 100644 (file)
index 0000000..f8767be
--- /dev/null
@@ -0,0 +1,105 @@
+From a56e65a2b1e3978341f865019dc4fa5cbfaba2e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 16:39:31 +0800
+Subject: skmsg: Skip zero length skb in sk_msg_recvmsg
+
+From: Geliang Tang <tanggeliang@kylinos.cn>
+
+[ Upstream commit f0c18025693707ec344a70b6887f7450bf4c826b ]
+
+When running BPF selftests (./test_progs -t sockmap_basic) on a Loongarch
+platform, the following kernel panic occurs:
+
+  [...]
+  Oops[#1]:
+  CPU: 22 PID: 2824 Comm: test_progs Tainted: G           OE  6.10.0-rc2+ #18
+  Hardware name: LOONGSON Dabieshan/Loongson-TC542F0, BIOS Loongson-UDK2018
+     ... ...
+     ra: 90000000048bf6c0 sk_msg_recvmsg+0x120/0x560
+    ERA: 9000000004162774 copy_page_to_iter+0x74/0x1c0
+   CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE)
+   PRMD: 0000000c (PPLV0 +PIE +PWE)
+   EUEN: 00000007 (+FPE +SXE +ASXE -BTE)
+   ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7)
+  ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0)
+   BADV: 0000000000000040
+   PRID: 0014c011 (Loongson-64bit, Loongson-3C5000)
+  Modules linked in: bpf_testmod(OE) xt_CHECKSUM xt_MASQUERADE xt_conntrack
+  Process test_progs (pid: 2824, threadinfo=0000000000863a31, task=...)
+  Stack : ...
+  Call Trace:
+  [<9000000004162774>] copy_page_to_iter+0x74/0x1c0
+  [<90000000048bf6c0>] sk_msg_recvmsg+0x120/0x560
+  [<90000000049f2b90>] tcp_bpf_recvmsg_parser+0x170/0x4e0
+  [<90000000049aae34>] inet_recvmsg+0x54/0x100
+  [<900000000481ad5c>] sock_recvmsg+0x7c/0xe0
+  [<900000000481e1a8>] __sys_recvfrom+0x108/0x1c0
+  [<900000000481e27c>] sys_recvfrom+0x1c/0x40
+  [<9000000004c076ec>] do_syscall+0x8c/0xc0
+  [<9000000003731da4>] handle_syscall+0xc4/0x160
+  Code: ...
+  ---[ end trace 0000000000000000 ]---
+  Kernel panic - not syncing: Fatal exception
+  Kernel relocated by 0x3510000
+   .text @ 0x9000000003710000
+   .data @ 0x9000000004d70000
+   .bss  @ 0x9000000006469400
+  ---[ end Kernel panic - not syncing: Fatal exception ]---
+  [...]
+
+This crash happens every time when running sockmap_skb_verdict_shutdown
+subtest in sockmap_basic.
+
+This crash is because a NULL pointer is passed to page_address() in the
+sk_msg_recvmsg(). Due to the different implementations depending on the
+architecture, page_address(NULL) will trigger a panic on Loongarch
+platform but not on x86 platform. So this bug was hidden on x86 platform
+for a while, but now it is exposed on Loongarch platform. The root cause
+is that a zero length skb (skb->len == 0) was put on the queue.
+
+This zero length skb is a TCP FIN packet, which was sent by shutdown(),
+invoked in test_sockmap_skb_verdict_shutdown():
+
+       shutdown(p1, SHUT_WR);
+
+In this case, in sk_psock_skb_ingress_enqueue(), num_sge is zero, and no
+page is put to this sge (see sg_set_page in sg_set_page), but this empty
+sge is queued into ingress_msg list.
+
+And in sk_msg_recvmsg(), this empty sge is used, and a NULL page is got by
+sg_page(sge). Pass this NULL page to copy_page_to_iter(), which passes it
+to kmap_local_page() and to page_address(), then kernel panics.
+
+To solve this, we should skip this zero length skb. So in sk_msg_recvmsg(),
+if copy is zero, that means it's a zero length skb, skip invoking
+copy_page_to_iter(). We are using the EFAULT return triggered by
+copy_page_to_iter to check for is_fin in tcp_bpf.c.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Suggested-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/bpf/e3a16eacdc6740658ee02a33489b1b9d4912f378.1719992715.git.tanggeliang@kylinos.cn
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skmsg.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 8b0459a6b629f..746d950de0e14 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -433,7 +433,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+                       page = sg_page(sge);
+                       if (copied + copy > len)
+                               copy = len - copied;
+-                      copy = copy_page_to_iter(page, sge->offset, copy, iter);
++                      if (copy)
++                              copy = copy_page_to_iter(page, sge->offset, copy, iter);
+                       if (!copy) {
+                               copied = copied ? copied : -EFAULT;
+                               goto out;
+-- 
+2.43.0
+
diff --git a/queue-6.1/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch b/queue-6.1/tcp-fix-incorrect-undo-caused-by-dsack-of-tlp-retran.patch
new file mode 100644 (file)
index 0000000..b4116e4
--- /dev/null
@@ -0,0 +1,107 @@
+From 688a773b8047b9d343714911fd4fb702b6d50fca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 13:12:46 -0400
+Subject: tcp: fix incorrect undo caused by DSACK of TLP retransmit
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 0ec986ed7bab6801faed1440e8839dcc710331ff ]
+
+Loss recovery undo_retrans bookkeeping had a long-standing bug where a
+DSACK from a spurious TLP retransmit packet could cause an erroneous
+undo of a fast recovery or RTO recovery that repaired a single
+really-lost packet (in a sequence range outside that of the TLP
+retransmit). Basically, because the loss recovery state machine didn't
+account for the fact that it sent a TLP retransmit, the DSACK for the
+TLP retransmit could erroneously be implicitly be interpreted as
+corresponding to the normal fast recovery or RTO recovery retransmit
+that plugged a real hole, thus resulting in an improper undo.
+
+For example, consider the following buggy scenario where there is a
+real packet loss but the congestion control response is improperly
+undone because of this bug:
+
++ send packets P1, P2, P3, P4
++ P1 is really lost
++ send TLP retransmit of P4
++ receive SACK for original P2, P3, P4
++ enter fast recovery, fast-retransmit P1, increment undo_retrans to 1
++ receive DSACK for TLP P4, decrement undo_retrans to 0, undo (bug!)
++ receive cumulative ACK for P1-P4 (fast retransmit plugged real hole)
+
+The fix: when we initialize undo machinery in tcp_init_undo(), if
+there is a TLP retransmit in flight, then increment tp->undo_retrans
+so that we make sure that we receive a DSACK corresponding to the TLP
+retransmit, as well as DSACKs for all later normal retransmits, before
+triggering a loss recovery undo. Note that we also have to move the
+line that clears tp->tlp_high_seq for RTO recovery, so that upon RTO
+we remember the tp->tlp_high_seq value until tcp_init_undo() and clear
+it only afterward.
+
+Also note that the bug dates back to the original 2013 TLP
+implementation, commit 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)").
+
+However, this patch will only compile and work correctly with kernels
+that have tp->tlp_retrans, which was added only in v5.8 in 2020 in
+commit 76be93fc0702 ("tcp: allow at most one TLP probe per flight").
+So we associate this fix with that later commit.
+
+Fixes: 76be93fc0702 ("tcp: allow at most one TLP probe per flight")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Kevin Yang <yyd@google.com>
+Link: https://patch.msgid.link/20240703171246.1739561-1-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 11 ++++++++++-
+ net/ipv4/tcp_timer.c |  2 --
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 317cb90d77102..359ffda9b736b 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2101,8 +2101,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
+ static inline void tcp_init_undo(struct tcp_sock *tp)
+ {
+       tp->undo_marker = tp->snd_una;
++
+       /* Retransmission still in flight may cause DSACKs later. */
+-      tp->undo_retrans = tp->retrans_out ? : -1;
++      /* First, account for regular retransmits in flight: */
++      tp->undo_retrans = tp->retrans_out;
++      /* Next, account for TLP retransmits in flight: */
++      if (tp->tlp_high_seq && tp->tlp_retrans)
++              tp->undo_retrans++;
++      /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
++      if (!tp->undo_retrans)
++              tp->undo_retrans = -1;
+ }
+ static bool tcp_is_rack(const struct sock *sk)
+@@ -2181,6 +2189,7 @@ void tcp_enter_loss(struct sock *sk)
+       tcp_set_ca_state(sk, TCP_CA_Loss);
+       tp->high_seq = tp->snd_nxt;
++      tp->tlp_high_seq = 0;
+       tcp_ecn_queue_cwr(tp);
+       /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
+index 44b49f7d1a9e6..f36492331ef0b 100644
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -496,8 +496,6 @@ void tcp_retransmit_timer(struct sock *sk)
+       if (WARN_ON_ONCE(!skb))
+               return;
+-      tp->tlp_high_seq = 0;
+-
+       if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
+           !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
+               /* Receiver dastardly shrinks window. Our retransmits
+-- 
+2.43.0
+
diff --git a/queue-6.1/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch b/queue-6.1/udp-set-sock_rcu_free-earlier-in-udp_lib_get_port.patch
new file mode 100644 (file)
index 0000000..c8b9f78
--- /dev/null
@@ -0,0 +1,123 @@
+From 139f02dcb0bb8d37a5229c7a8b5c2b206eb18227 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Jul 2024 12:13:56 -0700
+Subject: udp: Set SOCK_RCU_FREE earlier in udp_lib_get_port().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 5c0b485a8c6116516f33925b9ce5b6104a6eadfd ]
+
+syzkaller triggered the warning [0] in udp_v4_early_demux().
+
+In udp_v[46]_early_demux() and sk_lookup(), we do not touch the refcount
+of the looked-up sk and use sock_pfree() as skb->destructor, so we check
+SOCK_RCU_FREE to ensure that the sk is safe to access during the RCU grace
+period.
+
+Currently, SOCK_RCU_FREE is flagged for a bound socket after being put
+into the hash table.  Moreover, the SOCK_RCU_FREE check is done too early
+in udp_v[46]_early_demux() and sk_lookup(), so there could be a small race
+window:
+
+  CPU1                                 CPU2
+  ----                                 ----
+  udp_v4_early_demux()                 udp_lib_get_port()
+  |                                    |- hlist_add_head_rcu()
+  |- sk = __udp4_lib_demux_lookup()    |
+  |- DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+                                       `- sock_set_flag(sk, SOCK_RCU_FREE)
+
+We had the same bug in TCP and fixed it in commit 871019b22d1b ("net:
+set SOCK_RCU_FREE before inserting socket into hashtable").
+
+Let's apply the same fix for UDP.
+
+[0]:
+WARNING: CPU: 0 PID: 11198 at net/ipv4/udp.c:2599 udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599
+Modules linked in:
+CPU: 0 PID: 11198 Comm: syz-executor.1 Not tainted 6.9.0-g93bda33046e7 #13
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+RIP: 0010:udp_v4_early_demux+0x481/0xb70 net/ipv4/udp.c:2599
+Code: c5 7a 15 fe bb 01 00 00 00 44 89 e9 31 ff d3 e3 81 e3 bf ef ff ff 89 de e8 2c 74 15 fe 85 db 0f 85 02 06 00 00 e8 9f 7a 15 fe <0f> 0b e8 98 7a 15 fe 49 8d 7e 60 e8 4f 39 2f fe 49 c7 46 60 20 52
+RSP: 0018:ffffc9000ce3fa58 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8318c92c
+RDX: ffff888036ccde00 RSI: ffffffff8318c2f1 RDI: 0000000000000001
+RBP: ffff88805a2dd6e0 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0001ffffffffffff R12: ffff88805a2dd680
+R13: 0000000000000007 R14: ffff88800923f900 R15: ffff88805456004e
+FS:  00007fc449127640(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fc449126e38 CR3: 000000003de4b002 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ip_rcv_finish_core.constprop.0+0xbdd/0xd20 net/ipv4/ip_input.c:349
+ ip_rcv_finish+0xda/0x150 net/ipv4/ip_input.c:447
+ NF_HOOK include/linux/netfilter.h:314 [inline]
+ NF_HOOK include/linux/netfilter.h:308 [inline]
+ ip_rcv+0x16c/0x180 net/ipv4/ip_input.c:569
+ __netif_receive_skb_one_core+0xb3/0xe0 net/core/dev.c:5624
+ __netif_receive_skb+0x21/0xd0 net/core/dev.c:5738
+ netif_receive_skb_internal net/core/dev.c:5824 [inline]
+ netif_receive_skb+0x271/0x300 net/core/dev.c:5884
+ tun_rx_batched drivers/net/tun.c:1549 [inline]
+ tun_get_user+0x24db/0x2c50 drivers/net/tun.c:2002
+ tun_chr_write_iter+0x107/0x1a0 drivers/net/tun.c:2048
+ new_sync_write fs/read_write.c:497 [inline]
+ vfs_write+0x76f/0x8d0 fs/read_write.c:590
+ ksys_write+0xbf/0x190 fs/read_write.c:643
+ __do_sys_write fs/read_write.c:655 [inline]
+ __se_sys_write fs/read_write.c:652 [inline]
+ __x64_sys_write+0x41/0x50 fs/read_write.c:652
+ x64_sys_call+0xe66/0x1990 arch/x86/include/generated/asm/syscalls_64.h:2
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+RIP: 0033:0x7fc44a68bc1f
+Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 e9 cf f5 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 3c d0 f5 ff 48
+RSP: 002b:00007fc449126c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 00000000004bc050 RCX: 00007fc44a68bc1f
+RDX: 0000000000000032 RSI: 00000000200000c0 RDI: 00000000000000c8
+RBP: 00000000004bc050 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000032 R11: 0000000000000293 R12: 0000000000000000
+R13: 000000000000000b R14: 00007fc44a5ec530 R15: 0000000000000000
+ </TASK>
+
+Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20240709191356.24010-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index b8f93c1479ae1..53267566808c1 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -319,6 +319,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
+                       goto fail_unlock;
+               }
++              sock_set_flag(sk, SOCK_RCU_FREE);
++
+               sk_add_node_rcu(sk, &hslot->head);
+               hslot->count++;
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+@@ -335,7 +337,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
+               hslot2->count++;
+               spin_unlock(&hslot2->lock);
+       }
+-      sock_set_flag(sk, SOCK_RCU_FREE);
++
+       error = 0;
+ fail_unlock:
+       spin_unlock_bh(&hslot->lock);
+-- 
+2.43.0
+
diff --git a/queue-6.1/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch b/queue-6.1/vfs-don-t-mod-negative-dentry-count-when-on-shrinker.patch
new file mode 100644 (file)
index 0000000..695f9bf
--- /dev/null
@@ -0,0 +1,88 @@
+From 4bcc40214587f2d1bda953fb620d9c97f16a02a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 08:13:01 -0400
+Subject: vfs: don't mod negative dentry count when on shrinker list
+
+From: Brian Foster <bfoster@redhat.com>
+
+[ Upstream commit aabfe57ebaa75841db47ea59091ec3c5a06d2f52 ]
+
+The nr_dentry_negative counter is intended to only account negative
+dentries that are present on the superblock LRU. Therefore, the LRU
+add, remove and isolate helpers modify the counter based on whether
+the dentry is negative, but the shrinker list related helpers do not
+modify the counter, and the paths that change a dentry between
+positive and negative only do so if DCACHE_LRU_LIST is set.
+
+The problem with this is that a dentry on a shrinker list still has
+DCACHE_LRU_LIST set to indicate ->d_lru is in use. The additional
+DCACHE_SHRINK_LIST flag denotes whether the dentry is on LRU or a
+shrink related list. Therefore if a relevant operation (i.e. unlink)
+occurs while a dentry is present on a shrinker list, and the
+associated codepath only checks for DCACHE_LRU_LIST, then it is
+technically possible to modify the negative dentry count for a
+dentry that is off the LRU. Since the shrinker list related helpers
+do not modify the negative dentry count (because non-LRU dentries
+should not be included in the count) when the dentry is ultimately
+removed from the shrinker list, this can cause the negative dentry
+count to become permanently inaccurate.
+
+This problem can be reproduced via a heavy file create/unlink vs.
+drop_caches workload. On an 80xcpu system, I start 80 tasks each
+running a 1k file create/delete loop, and one task spinning on
+drop_caches. After 10 minutes or so of runtime, the idle/clean cache
+negative dentry count increases from somewhere in the range of 5-10
+entries to several hundred (and increasingly grows beyond
+nr_dentry_unused).
+
+Tweak the logic in the paths that turn a dentry negative or positive
+to filter out the case where the dentry is present on a shrink
+related list. This allows the above workload to maintain an accurate
+negative dentry count.
+
+Fixes: af0c9af1b3f6 ("fs/dcache: Track & report number of negative dentries")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Link: https://lore.kernel.org/r/20240703121301.247680-1-bfoster@redhat.com
+Acked-by: Ian Kent <ikent@redhat.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/dcache.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/dcache.c b/fs/dcache.c
+index 9b10f1872f6c9..04f32dc8d1ad8 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -356,7 +356,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
+       flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+       WRITE_ONCE(dentry->d_flags, flags);
+       dentry->d_inode = NULL;
+-      if (flags & DCACHE_LRU_LIST)
++      /*
++       * The negative counter only tracks dentries on the LRU. Don't inc if
++       * d_lru is on another list.
++       */
++      if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
+               this_cpu_inc(nr_dentry_negative);
+ }
+@@ -2001,9 +2005,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
+       spin_lock(&dentry->d_lock);
+       /*
+-       * Decrement negative dentry count if it was in the LRU list.
++       * The negative counter only tracks dentries on the LRU. Don't dec if
++       * d_lru is on another list.
+        */
+-      if (dentry->d_flags & DCACHE_LRU_LIST)
++      if ((dentry->d_flags &
++           (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
+               this_cpu_dec(nr_dentry_negative);
+       hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+       raw_write_seqcount_begin(&dentry->d_seq);
+-- 
+2.43.0
+