5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)
diff --git a/queue-5.4/act_mirred-use-the-backlog-for-nested-calls-to-mirred-ingress.patch b/queue-5.4/act_mirred-use-the-backlog-for-nested-calls-to-mirred-ingress.patch

new file mode 100644 (file)

index 0000000..ce92f33
--- /dev/null
+++ b/queue-5.4/act_mirred-use-the-backlog-for-nested-calls-to-mirred-ingress.patch
@@ -0,0 +1,136 @@
+From stable+bounces-164693-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:39 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:19 +0530
+Subject: act_mirred: use the backlog for nested calls to mirred ingress
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Davide Caratti <dcaratti@redhat.com>, William Zhao <wizhao@redhat.com>, Xin Long <lucien.xin@gmail.com>, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>, Jamal Hadi Salim <jhs@mojatatu.com>, Paolo Abeni <pabeni@redhat.com>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-9-skulkarni@mvista.com>
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640 ]
+
+William reports kernel soft-lockups on some OVS topologies when TC mirred
+egress->ingress action is hit by local TCP traffic [1].
+The same can also be reproduced with SCTP (thanks Xin for verifying), when
+client and server reach themselves through mirred egress to ingress, and
+one of the two peers sends a "heartbeat" packet (from within a timer).
+
+Enqueueing to backlog proved to fix this soft lockup; however, as Cong
+noticed [2], we should preserve - when possible - the current mirred
+behavior that counts as "overlimits" any eventual packet drop subsequent to
+the mirred forwarding action [3]. A compromise solution might use the
+backlog only when tcf_mirred_act() has a nest level greater than one:
+change tcf_mirred_forward() accordingly.
+
+Also, add a kselftest that can reproduce the lockup and verifies TC mirred
+ability to account for further packet drops after TC mirred egress->ingress
+(when the nest level is 1).
+
+ [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
+ [2] https://lore.kernel.org/netdev/Y0w%2FWWY60gqrtGLp@pop-os.localdomain/
+ [3] such behavior is not guaranteed: for example, if RPS or skb RX
+     timestamping is enabled on the mirred target device, the kernel
+     can defer receiving the skb and return NET_RX_SUCCESS inside
+     tcf_mirred_forward().
+
+Reported-by: William Zhao <wizhao@redhat.com>
+CC: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+[ skulkarni: Adjusted patch for file 'tc_actions.sh' wrt the mainline commit ]
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c                               |    7 ++
+ tools/testing/selftests/net/forwarding/tc_actions.sh |   48 ++++++++++++++++++-
+ 2 files changed, 54 insertions(+), 1 deletion(-)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -206,12 +206,19 @@ release_idr:
+       return err;
+ }
+ 
++static bool is_mirred_nested(void)
++{
++      return unlikely(__this_cpu_read(mirred_nest_level) > 1);
++}
++
+ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+ {
+       int err;
+ 
+       if (!want_ingress)
+               err = dev_queue_xmit(skb);
++      else if (is_mirred_nested())
++              err = netif_rx(skb);
+       else
+               err = netif_receive_skb(skb);
+ 
+--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
++++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
+@@ -3,7 +3,7 @@
+ 
+ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+       mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+-      gact_trap_test"
++      gact_trap_test mirred_egress_to_ingress_tcp_test"
+ NUM_NETIFS=4
+ source tc_common.sh
+ source lib.sh
+@@ -153,6 +153,52 @@ gact_trap_test()
+       log_test "trap ($tcflags)"
+ }
+ 
++mirred_egress_to_ingress_tcp_test()
++{
++      local tmpfile=$(mktemp) tmpfile1=$(mktemp)
++
++      RET=0
++      dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
++      tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
++              $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
++                      action ct commit nat src addr 192.0.2.2 pipe \
++                      action ct clear pipe \
++                      action ct commit nat dst addr 192.0.2.1 pipe \
++                      action ct clear pipe \
++                      action skbedit ptype host pipe \
++                      action mirred ingress redirect dev $h1
++      tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
++              $tcflags ip_proto icmp \
++                      action mirred ingress redirect dev $h1
++      tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
++              ip_proto icmp \
++                      action drop
++
++      ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1  &
++      local rpid=$!
++      ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
++      wait -n $rpid
++      cmp -s $tmpfile $tmpfile1
++      check_err $? "server output check failed"
++
++      $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
++              -t icmp "ping,id=42,seq=5" -q
++      tc_check_packets "dev $h1 egress" 101 10
++      check_err $? "didn't mirred redirect ICMP"
++      tc_check_packets "dev $h1 ingress" 102 10
++      check_err $? "didn't drop mirred ICMP"
++      local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
++      test ${overlimits} = 10
++      check_err $? "wrong overlimits, expected 10 got ${overlimits}"
++
++      tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
++      tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
++      tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
++
++      rm -f $tmpfile $tmpfile1
++      log_test "mirred_egress_to_ingress_tcp ($tcflags)"
++}
++
+ setup_prepare()
+ {
+       h1=${NETIFS[p1]}
diff --git a/queue-5.4/codel-remove-sch-q.qlen-check-before-qdisc_tree_reduce_backlog.patch b/queue-5.4/codel-remove-sch-q.qlen-check-before-qdisc_tree_reduce_backlog.patch

new file mode 100644 (file)

index 0000000..1829c5c
--- /dev/null
+++ b/queue-5.4/codel-remove-sch-q.qlen-check-before-qdisc_tree_reduce_backlog.patch
@@ -0,0 +1,57 @@
+From 342debc12183b51773b3345ba267e9263bdfaaef Mon Sep 17 00:00:00 2001
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 3 Apr 2025 14:16:31 -0700
+Subject: codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+commit 342debc12183b51773b3345ba267e9263bdfaaef upstream.
+
+After making all ->qlen_notify() callbacks idempotent, now it is safe to
+remove the check of qlen!=0 from both fq_codel_dequeue() and
+codel_qdisc_dequeue().
+
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM")
+Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250403211636.166257-1-xiyou.wangcong@gmail.com
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_codel.c    |    5 +----
+ net/sched/sch_fq_codel.c |    6 ++----
+ 2 files changed, 3 insertions(+), 8 deletions(-)
+
+--- a/net/sched/sch_codel.c
++++ b/net/sched/sch_codel.c
+@@ -95,10 +95,7 @@ static struct sk_buff *codel_qdisc_deque
+                           &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
+                           drop_func, dequeue_func);
+ 
+-      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+-       * or HTB crashes. Defer it for next round.
+-       */
+-      if (q->stats.drop_count && sch->q.qlen) {
++      if (q->stats.drop_count) {
+               qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
+               q->stats.drop_count = 0;
+               q->stats.drop_len = 0;
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -315,10 +315,8 @@ begin:
+       }
+       qdisc_bstats_update(sch, skb);
+       flow->deficit -= qdisc_pkt_len(skb);
+-      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+-       * or HTB crashes. Defer it for next round.
+-       */
+-      if (q->cstats.drop_count && sch->q.qlen) {
++
++      if (q->cstats.drop_count) {
+               qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+                                         q->cstats.drop_len);
+               q->cstats.drop_count = 0;
diff --git a/queue-5.4/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch b/queue-5.4/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch

new file mode 100644 (file)

index 0000000..b0d5291
--- /dev/null
+++ b/queue-5.4/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch
@@ -0,0 +1,210 @@
+From stable+bounces-165153-greg=kroah.com@vger.kernel.org Wed Jul 30 02:58:50 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 17:58:06 -0700
+Subject: mm: drop the assumption that VM_SHARED always implies writable
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Muchun Song <muchun.song@linux.dev>, Oscar Salvador <osalvador@suse.de>,  David Hildenbrand <david@redhat.com>, Alexander Viro <viro@zeniv.linux.org.uk>,  Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,  Andrew Morton <akpm@linux-foundation.org>, "Liam R. Howlett" <Liam.Howlett@oracle.com>,  Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>,  Michal Hocko <mhocko@suse.com>, Kees Cook <kees@kernel.org>, Ingo Molnar <mingo@redhat.com>,  Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>,  Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>,  Steven Rostedt <rostedt@goodmis.org>, Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,  Valentin Schneider <vschneid@redhat.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Jann Horn <jannh@google.com>,  Pedro Falcato <pfalcato@suse.de>, Hugh Dickins <hughd@google.com>,  Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com, linux-mm@kvack.org,  linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,  Lorenzo Stoakes <lstoakes@gmail.com>, Andy Lutomirski <luto@kernel.org>,  Mike Kravetz <mike.kravetz@oracle.com>
+Message-ID: <20250730005818.2793577-2-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit e8e17ee90eaf650c855adb0a3e5e965fd6692ff1 ]
+
+Patch series "permit write-sealed memfd read-only shared mappings", v4.
+
+The man page for fcntl() describing memfd file seals states the following
+about F_SEAL_WRITE:-
+
+    Furthermore, trying to create new shared, writable memory-mappings via
+    mmap(2) will also fail with EPERM.
+
+With emphasis on 'writable'.  In turns out in fact that currently the
+kernel simply disallows all new shared memory mappings for a memfd with
+F_SEAL_WRITE applied, rendering this documentation inaccurate.
+
+This matters because users are therefore unable to obtain a shared mapping
+to a memfd after write sealing altogether, which limits their usefulness.
+This was reported in the discussion thread [1] originating from a bug
+report [2].
+
+This is a product of both using the struct address_space->i_mmap_writable
+atomic counter to determine whether writing may be permitted, and the
+kernel adjusting this counter when any VM_SHARED mapping is performed and
+more generally implicitly assuming VM_SHARED implies writable.
+
+It seems sensible that we should only update this mapping if VM_MAYWRITE
+is specified, i.e.  whether it is possible that this mapping could at any
+point be written to.
+
+If we do so then all we need to do to permit write seals to function as
+documented is to clear VM_MAYWRITE when mapping read-only.  It turns out
+this functionality already exists for F_SEAL_FUTURE_WRITE - we can
+therefore simply adapt this logic to do the same for F_SEAL_WRITE.
+
+We then hit a chicken and egg situation in mmap_region() where the check
+for VM_MAYWRITE occurs before we are able to clear this flag.  To work
+around this, perform this check after we invoke call_mmap(), with careful
+consideration of error paths.
+
+Thanks to Andy Lutomirski for the suggestion!
+
+[1]:https://lore.kernel.org/all/20230324133646.16101dfa666f253c4715d965@linux-foundation.org/
+[2]:https://bugzilla.kernel.org/show_bug.cgi?id=217238
+
+This patch (of 3):
+
+There is a general assumption that VMAs with the VM_SHARED flag set are
+writable.  If the VM_MAYWRITE flag is not set, then this is simply not the
+case.
+
+Update those checks which affect the struct address_space->i_mmap_writable
+field to explicitly test for this by introducing
+[vma_]is_shared_maywrite() helper functions.
+
+This remains entirely conservative, as the lack of VM_MAYWRITE guarantees
+that the VMA cannot be written to.
+
+Link: https://lkml.kernel.org/r/cover.1697116581.git.lstoakes@gmail.com
+Link: https://lkml.kernel.org/r/d978aefefa83ec42d18dfa964ad180dbcde34795.1697116581.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Suggested-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fs.h |    4 ++--
+ include/linux/mm.h |   11 +++++++++++
+ kernel/fork.c      |    2 +-
+ mm/filemap.c       |    2 +-
+ mm/madvise.c       |    2 +-
+ mm/mmap.c          |   10 +++++-----
+ 6 files changed, 21 insertions(+), 10 deletions(-)
+
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -430,7 +430,7 @@ int pagecache_write_end(struct file *, s
+  * @host: Owner, either the inode or the block_device.
+  * @i_pages: Cached pages.
+  * @gfp_mask: Memory allocation flags to use for allocating pages.
+- * @i_mmap_writable: Number of VM_SHARED mappings.
++ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
+  * @nr_thps: Number of THPs in the pagecache (non-shmem only).
+  * @i_mmap: Tree of private and shared mappings.
+  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+@@ -553,7 +553,7 @@ static inline int mapping_mapped(struct
+ 
+ /*
+  * Might pages of this file have been modified in userspace?
+- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
++ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap_pgoff
+  * marks vma as VM_SHARED if it is shared, and the file was opened for
+  * writing i.e. vma may be mprotected writable even if now readonly.
+  *
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -549,6 +549,17 @@ static inline bool vma_is_anonymous(stru
+       return !vma->vm_ops;
+ }
+ 
++static inline bool is_shared_maywrite(vm_flags_t vm_flags)
++{
++      return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
++              (VM_SHARED | VM_MAYWRITE);
++}
++
++static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
++{
++      return is_shared_maywrite(vma->vm_flags);
++}
++
+ #ifdef CONFIG_SHMEM
+ /*
+  * The vma_is_shmem is not inline because it is used only by slow
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -566,7 +566,7 @@ static __latent_entropy int dup_mmap(str
+                       if (tmp->vm_flags & VM_DENYWRITE)
+                               atomic_dec(&inode->i_writecount);
+                       i_mmap_lock_write(mapping);
+-                      if (tmp->vm_flags & VM_SHARED)
++                      if (vma_is_shared_maywrite(tmp))
+                               atomic_inc(&mapping->i_mmap_writable);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after mpnt */
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2876,7 +2876,7 @@ int generic_file_mmap(struct file * file
+  */
+ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+-      if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
++      if (vma_is_shared_maywrite(vma))
+               return -EINVAL;
+       return generic_file_mmap(file, vma);
+ }
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -839,7 +839,7 @@ static long madvise_remove(struct vm_are
+                       return -EINVAL;
+       }
+ 
+-      if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
++      if (!vma_is_shared_maywrite(vma))
+               return -EACCES;
+ 
+       offset = (loff_t)(start - vma->vm_start)
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -141,7 +141,7 @@ static void __remove_shared_vm_struct(st
+ {
+       if (vma->vm_flags & VM_DENYWRITE)
+               atomic_inc(&file_inode(file)->i_writecount);
+-      if (vma->vm_flags & VM_SHARED)
++      if (vma_is_shared_maywrite(vma))
+               mapping_unmap_writable(mapping);
+ 
+       flush_dcache_mmap_lock(mapping);
+@@ -619,7 +619,7 @@ static void __vma_link_file(struct vm_ar
+ 
+               if (vma->vm_flags & VM_DENYWRITE)
+                       atomic_dec(&file_inode(file)->i_writecount);
+-              if (vma->vm_flags & VM_SHARED)
++              if (vma_is_shared_maywrite(vma))
+                       atomic_inc(&mapping->i_mmap_writable);
+ 
+               flush_dcache_mmap_lock(mapping);
+@@ -1785,7 +1785,7 @@ unsigned long mmap_region(struct file *f
+                       if (error)
+                               goto free_vma;
+               }
+-              if (vm_flags & VM_SHARED) {
++              if (is_shared_maywrite(vm_flags)) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto allow_write_and_free_vma;
+@@ -1823,7 +1823,7 @@ unsigned long mmap_region(struct file *f
+       vma_link(mm, vma, prev, rb_link, rb_parent);
+       /* Once vma denies write, undo our temporary denial count */
+       if (file) {
+-              if (vm_flags & VM_SHARED)
++              if (is_shared_maywrite(vm_flags))
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+@@ -1864,7 +1864,7 @@ unmap_and_free_vma:
+ 
+       /* Undo any partial mapping done by a device driver. */
+       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+-      if (vm_flags & VM_SHARED)
++      if (is_shared_maywrite(vm_flags))
+               mapping_unmap_writable(file->f_mapping);
+ allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
diff --git a/queue-5.4/mm-perform-the-mapping_map_writable-check-after-call_mmap.patch b/queue-5.4/mm-perform-the-mapping_map_writable-check-after-call_mmap.patch

new file mode 100644 (file)

index 0000000..3bf3327
--- /dev/null
+++ b/queue-5.4/mm-perform-the-mapping_map_writable-check-after-call_mmap.patch
@@ -0,0 +1,119 @@
+From stable+bounces-165155-greg=kroah.com@vger.kernel.org Wed Jul 30 02:59:23 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 17:58:08 -0700
+Subject: mm: perform the mapping_map_writable() check after call_mmap()
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Muchun Song <muchun.song@linux.dev>, Oscar Salvador <osalvador@suse.de>,  David Hildenbrand <david@redhat.com>, Alexander Viro <viro@zeniv.linux.org.uk>,  Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,  Andrew Morton <akpm@linux-foundation.org>, "Liam R. Howlett" <Liam.Howlett@oracle.com>,  Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>,  Michal Hocko <mhocko@suse.com>, Kees Cook <kees@kernel.org>, Ingo Molnar <mingo@redhat.com>,  Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>,  Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>,  Steven Rostedt <rostedt@goodmis.org>, Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,  Valentin Schneider <vschneid@redhat.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Jann Horn <jannh@google.com>,  Pedro Falcato <pfalcato@suse.de>, Hugh Dickins <hughd@google.com>,  Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com, linux-mm@kvack.org,  linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,  Lorenzo Stoakes <lstoakes@gmail.com>, Andy Lutomirski <luto@kernel.org>,  Mike Kravetz <mike.kravetz@oracle.com>
+Message-ID: <20250730005818.2793577-4-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 158978945f3173b8c1a88f8c5684a629736a57ac ]
+
+In order for a F_SEAL_WRITE sealed memfd mapping to have an opportunity to
+clear VM_MAYWRITE, we must be able to invoke the appropriate
+vm_ops->mmap() handler to do so.  We would otherwise fail the
+mapping_map_writable() check before we had the opportunity to avoid it.
+
+This patch moves this check after the call_mmap() invocation.  Only memfd
+actively denies write access causing a potential failure here (in
+memfd_add_seals()), so there should be no impact on non-memfd cases.
+
+This patch makes the userland-visible change that MAP_SHARED, PROT_READ
+mappings of an F_SEAL_WRITE sealed memfd mapping will now succeed.
+
+There is a delicate situation with cleanup paths assuming that a writable
+mapping must have occurred in circumstances where it may now not have.  In
+order to ensure we do not accidentally mark a writable file unwritable by
+mistake, we explicitly track whether we have a writable mapping and unmap
+only if we do.
+
+[lstoakes@gmail.com: do not set writable_file_mapping in inappropriate case]
+  Link: https://lkml.kernel.org/r/c9eb4cc6-7db4-4c2b-838d-43a0b319a4f0@lucifer.local
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217238
+Link: https://lkml.kernel.org/r/55e413d20678a1bb4c7cce889062bbb07b0df892.1697116581.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+[isaacmanjarres: added error handling to cleanup the work done by the
+mmap() callback and removed unused label.]
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c |   22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1718,6 +1718,7 @@ unsigned long mmap_region(struct file *f
+ {
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma, *prev;
++      bool writable_file_mapping = false;
+       int error;
+       struct rb_node **rb_link, *rb_parent;
+       unsigned long charged = 0;
+@@ -1785,11 +1786,6 @@ unsigned long mmap_region(struct file *f
+                       if (error)
+                               goto free_vma;
+               }
+-              if (is_shared_maywrite(vm_flags)) {
+-                      error = mapping_map_writable(file->f_mapping);
+-                      if (error)
+-                              goto allow_write_and_free_vma;
+-              }
+ 
+               /* ->mmap() can change vma->vm_file, but must guarantee that
+                * vma_link() below can deny write-access if VM_DENYWRITE is set
+@@ -1801,6 +1797,14 @@ unsigned long mmap_region(struct file *f
+               if (error)
+                       goto unmap_and_free_vma;
+ 
++              if (vma_is_shared_maywrite(vma)) {
++                      error = mapping_map_writable(file->f_mapping);
++                      if (error)
++                              goto close_and_free_vma;
++
++                      writable_file_mapping = true;
++              }
++
+               /* Can addr have changed??
+                *
+                * Answer: Yes, several device drivers can do it in their
+@@ -1823,7 +1827,7 @@ unsigned long mmap_region(struct file *f
+       vma_link(mm, vma, prev, rb_link, rb_parent);
+       /* Once vma denies write, undo our temporary denial count */
+       if (file) {
+-              if (is_shared_maywrite(vm_flags))
++              if (writable_file_mapping)
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+@@ -1858,15 +1862,17 @@ out:
+ 
+       return addr;
+ 
++close_and_free_vma:
++      if (vma->vm_ops && vma->vm_ops->close)
++              vma->vm_ops->close(vma);
+ unmap_and_free_vma:
+       vma->vm_file = NULL;
+       fput(file);
+ 
+       /* Undo any partial mapping done by a device driver. */
+       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+-      if (is_shared_maywrite(vm_flags))
++      if (writable_file_mapping)
+               mapping_unmap_writable(file->f_mapping);
+-allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
+ free_vma:
diff --git a/queue-5.4/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch b/queue-5.4/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch

new file mode 100644 (file)

index 0000000..478db8f
--- /dev/null
+++ b/queue-5.4/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch
@@ -0,0 +1,104 @@
+From stable+bounces-165154-greg=kroah.com@vger.kernel.org Wed Jul 30 02:59:10 2025
+From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
+Date: Tue, 29 Jul 2025 17:58:07 -0700
+Subject: mm: update memfd seal write check to include F_SEAL_WRITE
+To: lorenzo.stoakes@oracle.com, gregkh@linuxfoundation.org,  Muchun Song <muchun.song@linux.dev>, Oscar Salvador <osalvador@suse.de>,  David Hildenbrand <david@redhat.com>, Alexander Viro <viro@zeniv.linux.org.uk>,  Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,  Andrew Morton <akpm@linux-foundation.org>, "Liam R. Howlett" <Liam.Howlett@oracle.com>,  Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>, Suren Baghdasaryan <surenb@google.com>,  Michal Hocko <mhocko@suse.com>, Kees Cook <kees@kernel.org>, Ingo Molnar <mingo@redhat.com>,  Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>,  Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>,  Steven Rostedt <rostedt@goodmis.org>, Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,  Valentin Schneider <vschneid@redhat.com>, "Matthew Wilcox (Oracle)" <willy@infradead.org>, Jann Horn <jannh@google.com>,  Pedro Falcato <pfalcato@suse.de>, Hugh Dickins <hughd@google.com>,  Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: aliceryhl@google.com, stable@vger.kernel.org,  "Isaac J. Manjarres" <isaacmanjarres@google.com>, kernel-team@android.com, linux-mm@kvack.org,  linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,  Lorenzo Stoakes <lstoakes@gmail.com>, Andy Lutomirski <luto@kernel.org>,  Mike Kravetz <mike.kravetz@oracle.com>
+Message-ID: <20250730005818.2793577-3-isaacmanjarres@google.com>
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 28464bbb2ddc199433383994bcb9600c8034afa1 ]
+
+The seal_check_future_write() function is called by shmem_mmap() or
+hugetlbfs_file_mmap() to disallow any future writable mappings of an memfd
+sealed this way.
+
+The F_SEAL_WRITE flag is not checked here, as that is handled via the
+mapping->i_mmap_writable mechanism and so any attempt at a mapping would
+fail before this could be run.
+
+However we intend to change this, meaning this check can be performed for
+F_SEAL_WRITE mappings also.
+
+The logic here is equally applicable to both flags, so update this
+function to accommodate both and rename it accordingly.
+
+Link: https://lkml.kernel.org/r/913628168ce6cce77df7d13a63970bae06a526e0.1697116581.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |    2 +-
+ include/linux/mm.h   |   15 ++++++++-------
+ mm/shmem.c           |    2 +-
+ 3 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -152,7 +152,7 @@ static int hugetlbfs_file_mmap(struct fi
+       vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+       vma->vm_ops = &hugetlb_vm_ops;
+ 
+-      ret = seal_check_future_write(info->seals, vma);
++      ret = seal_check_write(info->seals, vma);
+       if (ret)
+               return ret;
+ 
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -2946,25 +2946,26 @@ static inline int pages_identical(struct
+ }
+ 
+ /**
+- * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
++ * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
++ *                    handle them.
+  * @seals: the seals to check
+  * @vma: the vma to operate on
+  *
+- * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+- * the vma flags.  Return 0 if check pass, or <0 for errors.
++ * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper
++ * check/handling on the vma flags.  Return 0 if check pass, or <0 for errors.
+  */
+-static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
++static inline int seal_check_write(int seals, struct vm_area_struct *vma)
+ {
+-      if (seals & F_SEAL_FUTURE_WRITE) {
++      if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+               /*
+                * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+-               * "future write" seal active.
++               * write seals are active.
+                */
+               if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+                       return -EPERM;
+ 
+               /*
+-               * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
++               * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
+                * MAP_SHARED and read-only, take care to not allow mprotect to
+                * revert protections on such mappings. Do this only for shared
+                * mappings. For private mappings, don't need to mask
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2215,7 +2215,7 @@ static int shmem_mmap(struct file *file,
+       struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+       int ret;
+ 
+-      ret = seal_check_future_write(info->seals, vma);
++      ret = seal_check_write(info->seals, vma);
+       if (ret)
+               return ret;
+ 
diff --git a/queue-5.4/net-sched-act_mirred-better-wording-on-protection-against-excessive-stack-growth.patch b/queue-5.4/net-sched-act_mirred-better-wording-on-protection-against-excessive-stack-growth.patch

new file mode 100644 (file)

index 0000000..da9f712
--- /dev/null
+++ b/queue-5.4/net-sched-act_mirred-better-wording-on-protection-against-excessive-stack-growth.patch
@@ -0,0 +1,88 @@
+From stable+bounces-164692-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:38 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:18 +0530
+Subject: net/sched: act_mirred: better wording on protection against excessive stack growth
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Davide Caratti <dcaratti@redhat.com>, Jamal Hadi Salim <jhs@mojatatu.com>, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>, Paolo Abeni <pabeni@redhat.com>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-8-skulkarni@mvista.com>
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 78dcdffe0418ac8f3f057f26fe71ccf4d8ed851f ]
+
+with commit e2ca070f89ec ("net: sched: protect against stack overflow in
+TC act_mirred"), act_mirred protected itself against excessive stack growth
+using per_cpu counter of nested calls to tcf_mirred_act(), and capping it
+to MIRRED_RECURSION_LIMIT. However, such protection does not detect
+recursion/loops in case the packet is enqueued to the backlog (for example,
+when the mirred target device has RPS or skb timestamping enabled). Change
+the wording from "recursion" to "nesting" to make it more clear to readers.
+
+CC: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+[ skulkarni: Adjusted patch for file 'act_mirred.c' - hunk #4/4 wrt the mainline commit ]
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_mirred.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -28,8 +28,8 @@
+ static LIST_HEAD(mirred_list);
+ static DEFINE_SPINLOCK(mirred_list_lock);
+ 
+-#define MIRRED_RECURSION_LIMIT    4
+-static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
++#define MIRRED_NEST_LIMIT    4
++static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
+ 
+ static bool tcf_mirred_is_act_redirect(int action)
+ {
+@@ -225,7 +225,7 @@ static int tcf_mirred_act(struct sk_buff
+       struct sk_buff *skb2 = skb;
+       bool m_mac_header_xmit;
+       struct net_device *dev;
+-      unsigned int rec_level;
++      unsigned int nest_level;
+       int retval, err = 0;
+       bool use_reinsert;
+       bool want_ingress;
+@@ -236,11 +236,11 @@ static int tcf_mirred_act(struct sk_buff
+       int mac_len;
+       bool at_nh;
+ 
+-      rec_level = __this_cpu_inc_return(mirred_rec_level);
+-      if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
++      nest_level = __this_cpu_inc_return(mirred_nest_level);
++      if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
+               net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+                                    netdev_name(skb->dev));
+-              __this_cpu_dec(mirred_rec_level);
++              __this_cpu_dec(mirred_nest_level);
+               return TC_ACT_SHOT;
+       }
+ 
+@@ -310,7 +310,7 @@ static int tcf_mirred_act(struct sk_buff
+                       err = tcf_mirred_forward(res->ingress, skb);
+                       if (err)
+                               tcf_action_inc_overlimit_qstats(&m->common);
+-                      __this_cpu_dec(mirred_rec_level);
++                      __this_cpu_dec(mirred_nest_level);
+                       return TC_ACT_CONSUMED;
+               }
+       }
+@@ -322,7 +322,7 @@ out:
+               if (tcf_mirred_is_act_redirect(m_eaction))
+                       retval = TC_ACT_SHOT;
+       }
+-      __this_cpu_dec(mirred_rec_level);
++      __this_cpu_dec(mirred_nest_level);
+ 
+       return retval;
+ }
diff --git a/queue-5.4/net-sched-act_mirred-refactor-the-handle-of-xmit.patch b/queue-5.4/net-sched-act_mirred-refactor-the-handle-of-xmit.patch

new file mode 100644 (file)

index 0000000..859a8b5
--- /dev/null
+++ b/queue-5.4/net-sched-act_mirred-refactor-the-handle-of-xmit.patch
@@ -0,0 +1,82 @@
+From stable+bounces-164691-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:26 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:17 +0530
+Subject: net/sched: act_mirred: refactor the handle of xmit
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, wenxu <wenxu@ucloud.cn>, Jakub Kicinski <kuba@kernel.org>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-7-skulkarni@mvista.com>
+
+From: wenxu <wenxu@ucloud.cn>
+
+[ Upstream commit fa6d639930ee5cd3f932cc314f3407f07a06582d ]
+
+This one is prepare for the next patch.
+
+Signed-off-by: wenxu <wenxu@ucloud.cn>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ skulkarni: Adjusted patch for file 'sch_generic.h' wrt the mainline commit ]
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |    5 -----
+ net/sched/act_mirred.c    |   21 +++++++++++++++------
+ 2 files changed, 15 insertions(+), 11 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -1320,11 +1320,6 @@ void mini_qdisc_pair_swap(struct mini_Qd
+ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+                         struct mini_Qdisc __rcu **p_miniq);
+ 
+-static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+-{
+-      return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
+-}
+-
+ /* Make sure qdisc is no longer in SCHED state. */
+ static inline void qdisc_synchronize(const struct Qdisc *q)
+ {
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -206,6 +206,18 @@ release_idr:
+       return err;
+ }
+ 
++static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
++{
++      int err;
++
++      if (!want_ingress)
++              err = dev_queue_xmit(skb);
++      else
++              err = netif_receive_skb(skb);
++
++      return err;
++}
++
+ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
+                         struct tcf_result *res)
+ {
+@@ -295,18 +307,15 @@ static int tcf_mirred_act(struct sk_buff
+               /* let's the caller reinsert the packet, if possible */
+               if (use_reinsert) {
+                       res->ingress = want_ingress;
+-                      if (skb_tc_reinsert(skb, res))
++                      err = tcf_mirred_forward(res->ingress, skb);
++                      if (err)
+                               tcf_action_inc_overlimit_qstats(&m->common);
+                       __this_cpu_dec(mirred_rec_level);
+                       return TC_ACT_CONSUMED;
+               }
+       }
+ 
+-      if (!want_ingress)
+-              err = dev_queue_xmit(skb2);
+-      else
+-              err = netif_receive_skb(skb2);
+-
++      err = tcf_mirred_forward(want_ingress, skb2);
+       if (err) {
+ out:
+               tcf_action_inc_overlimit_qstats(&m->common);
diff --git a/queue-5.4/net-sched-don-t-expose-action-qstats-to-skb_tc_reinsert.patch b/queue-5.4/net-sched-don-t-expose-action-qstats-to-skb_tc_reinsert.patch

new file mode 100644 (file)

index 0000000..f889f1b
--- /dev/null
+++ b/queue-5.4/net-sched-don-t-expose-action-qstats-to-skb_tc_reinsert.patch
@@ -0,0 +1,69 @@
+From stable+bounces-164689-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:33 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:15 +0530
+Subject: net: sched: don't expose action qstats to skb_tc_reinsert()
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Vlad Buslov <vladbu@mellanox.com>, Jiri Pirko <jiri@mellanox.com>, "David S . Miller" <davem@davemloft.net>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-5-skulkarni@mvista.com>
+
+From: Vlad Buslov <vladbu@mellanox.com>
+
+[ Upstream commit ef816f3c49c1c404ababc50e10d4cbe5109da678 ]
+
+Previous commit introduced helper function for updating qstats and
+refactored set of actions to use the helpers, instead of modifying qstats
+directly. However, one of the affected action exposes its qstats to
+skb_tc_reinsert(), which then modifies it.
+
+Refactor skb_tc_reinsert() to return integer error code and don't increment
+overlimit qstats in case of error, and use the returned error code in
+tcf_mirred_act() to manually increment the overlimit counter with new
+helper function.
+
+Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[ skulkarni: Adjusted patch for file 'sch_generic.h' wrt the mainline commit ]
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |   12 ++----------
+ net/sched/act_mirred.c    |    4 ++--
+ 2 files changed, 4 insertions(+), 12 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -1320,17 +1320,9 @@ void mini_qdisc_pair_swap(struct mini_Qd
+ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+                         struct mini_Qdisc __rcu **p_miniq);
+ 
+-static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
++static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+ {
+-      struct gnet_stats_queue *stats = res->qstats;
+-      int ret;
+-
+-      if (res->ingress)
+-              ret = netif_receive_skb(skb);
+-      else
+-              ret = dev_queue_xmit(skb);
+-      if (ret && stats)
+-              qstats_overlimit_inc(res->qstats);
++      return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb);
+ }
+ 
+ /* Make sure qdisc is no longer in SCHED state. */
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -295,8 +295,8 @@ static int tcf_mirred_act(struct sk_buff
+               /* let's the caller reinsert the packet, if possible */
+               if (use_reinsert) {
+                       res->ingress = want_ingress;
+-                      res->qstats = this_cpu_ptr(m->common.cpu_qstats);
+-                      skb_tc_reinsert(skb, res);
++                      if (skb_tc_reinsert(skb, res))
++                              tcf_action_inc_overlimit_qstats(&m->common);
+                       __this_cpu_dec(mirred_rec_level);
+                       return TC_ACT_CONSUMED;
+               }
diff --git a/queue-5.4/net-sched-extract-bstats-update-code-into-function.patch b/queue-5.4/net-sched-extract-bstats-update-code-into-function.patch

new file mode 100644 (file)

index 0000000..099e73f
--- /dev/null
+++ b/queue-5.4/net-sched-extract-bstats-update-code-into-function.patch
@@ -0,0 +1,117 @@
+From stable+bounces-164687-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:58 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:13 +0530
+Subject: net: sched: extract bstats update code into function
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Vlad Buslov <vladbu@mellanox.com>, Jiri Pirko <jiri@mellanox.com>, "David S . Miller" <davem@davemloft.net>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-3-skulkarni@mvista.com>
+
+From: Vlad Buslov <vladbu@mellanox.com>
+
+[ Upstream commit 5e1ad95b630e652d3467d1fd1f0b5e5ea2c441e2 ]
+
+Extract common code that increments cpu_bstats counter into standalone act
+API function. Change hardware offloaded actions that use percpu counter
+allocation to use the new function instead of incrementing cpu_bstats
+directly.
+
+This commit doesn't change functionality.
+
+Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/act_api.h      |    7 +++++++
+ net/sched/act_csum.c       |    2 +-
+ net/sched/act_ct.c         |    2 +-
+ net/sched/act_gact.c       |    2 +-
+ net/sched/act_mirred.c     |    2 +-
+ net/sched/act_tunnel_key.c |    2 +-
+ net/sched/act_vlan.c       |    2 +-
+ 7 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -186,6 +186,13 @@ int tcf_action_dump(struct sk_buff *skb,
+                   int ref);
+ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
+ int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
++
++static inline void tcf_action_update_bstats(struct tc_action *a,
++                                          struct sk_buff *skb)
++{
++      bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
++}
++
+ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+                            bool drop, bool hw);
+ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+--- a/net/sched/act_csum.c
++++ b/net/sched/act_csum.c
+@@ -577,7 +577,7 @@ static int tcf_csum_act(struct sk_buff *
+       params = rcu_dereference_bh(p->params);
+ 
+       tcf_lastuse_update(&p->tcf_tm);
+-      bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
++      tcf_action_update_bstats(&p->common, skb);
+ 
+       action = READ_ONCE(p->tcf_action);
+       if (unlikely(action == TC_ACT_SHOT))
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -482,7 +482,7 @@ out_push:
+       skb_push_rcsum(skb, nh_ofs);
+ 
+ out:
+-      bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
++      tcf_action_update_bstats(&c->common, skb);
+       return retval;
+ 
+ drop:
+--- a/net/sched/act_gact.c
++++ b/net/sched/act_gact.c
+@@ -159,7 +159,7 @@ static int tcf_gact_act(struct sk_buff *
+               action = gact_rand[ptype](gact);
+       }
+ #endif
+-      bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
++      tcf_action_update_bstats(&gact->common, skb);
+       if (action == TC_ACT_SHOT)
+               qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+ 
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -233,7 +233,7 @@ static int tcf_mirred_act(struct sk_buff
+       }
+ 
+       tcf_lastuse_update(&m->tcf_tm);
+-      bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
++      tcf_action_update_bstats(&m->common, skb);
+ 
+       m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+       m_eaction = READ_ONCE(m->tcfm_eaction);
+--- a/net/sched/act_tunnel_key.c
++++ b/net/sched/act_tunnel_key.c
+@@ -31,7 +31,7 @@ static int tunnel_key_act(struct sk_buff
+       params = rcu_dereference_bh(t->params);
+ 
+       tcf_lastuse_update(&t->tcf_tm);
+-      bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
++      tcf_action_update_bstats(&t->common, skb);
+       action = READ_ONCE(t->tcf_action);
+ 
+       switch (params->tcft_action) {
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -29,7 +29,7 @@ static int tcf_vlan_act(struct sk_buff *
+       u16 tci;
+ 
+       tcf_lastuse_update(&v->tcf_tm);
+-      bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
++      tcf_action_update_bstats(&v->common, skb);
+ 
+       /* Ensure 'data' points at mac_header prior calling vlan manipulating
+        * functions.
diff --git a/queue-5.4/net-sched-extract-common-action-counters-update-code-into-function.patch b/queue-5.4/net-sched-extract-common-action-counters-update-code-into-function.patch

new file mode 100644 (file)

index 0000000..cdb6713
--- /dev/null
+++ b/queue-5.4/net-sched-extract-common-action-counters-update-code-into-function.patch
@@ -0,0 +1,147 @@
+From stable+bounces-164686-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:51 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:12 +0530
+Subject: net: sched: extract common action counters update code into function
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Vlad Buslov <vladbu@mellanox.com>, Jiri Pirko <jiri@mellanox.com>, "David S . Miller" <davem@davemloft.net>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-2-skulkarni@mvista.com>
+
+From: Vlad Buslov <vladbu@mellanox.com>
+
+[ Upstream commit c8ecebd04cbb6badb46d42fe54282e7883ed63cc ]
+
+Currently, all implementations of tc_action_ops->stats_update() callback
+have almost exactly the same implementation of counters update
+code (besides gact which also updates drop counter). In order to simplify
+support for using both percpu-allocated and regular action counters
+depending on run-time flag in following patches, extract action counters
+update code into standalone function in act API.
+
+This commit doesn't change functionality.
+
+Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/act_api.h  |    2 ++
+ net/sched/act_api.c    |   14 ++++++++++++++
+ net/sched/act_ct.c     |    6 +-----
+ net/sched/act_gact.c   |   10 +---------
+ net/sched/act_mirred.c |    5 +----
+ net/sched/act_police.c |    5 +----
+ net/sched/act_vlan.c   |    5 +----
+ 7 files changed, 21 insertions(+), 26 deletions(-)
+
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -186,6 +186,8 @@ int tcf_action_dump(struct sk_buff *skb,
+                   int ref);
+ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
+ int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
++void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
++                           bool drop, bool hw);
+ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+ 
+ int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -1032,6 +1032,20 @@ err:
+       return err;
+ }
+ 
++void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
++                           bool drop, bool hw)
++{
++      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
++
++      if (drop)
++              this_cpu_ptr(a->cpu_qstats)->drops += packets;
++
++      if (hw)
++              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
++                                 bytes, packets);
++}
++EXPORT_SYMBOL(tcf_action_update_stats);
++
+ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
+                         int compat_mode)
+ {
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -917,11 +917,7 @@ static void tcf_stats_update(struct tc_a
+ {
+       struct tcf_ct *c = to_ct(a);
+ 
+-      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+-
+-      if (hw)
+-              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+-                                 bytes, packets);
++      tcf_action_update_stats(a, bytes, packets, false, hw);
+       c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
+ }
+ 
+--- a/net/sched/act_gact.c
++++ b/net/sched/act_gact.c
+@@ -175,15 +175,7 @@ static void tcf_gact_stats_update(struct
+       int action = READ_ONCE(gact->tcf_action);
+       struct tcf_t *tm = &gact->tcf_tm;
+ 
+-      _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
+-                         packets);
+-      if (action == TC_ACT_SHOT)
+-              this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
+-
+-      if (hw)
+-              _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
+-                                 bytes, packets);
+-
++      tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+ }
+ 
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -324,10 +324,7 @@ static void tcf_stats_update(struct tc_a
+       struct tcf_mirred *m = to_mirred(a);
+       struct tcf_t *tm = &m->tcf_tm;
+ 
+-      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+-      if (hw)
+-              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+-                                 bytes, packets);
++      tcf_action_update_stats(a, bytes, packets, false, hw);
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+ }
+ 
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -306,10 +306,7 @@ static void tcf_police_stats_update(stru
+       struct tcf_police *police = to_police(a);
+       struct tcf_t *tm = &police->tcf_tm;
+ 
+-      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+-      if (hw)
+-              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+-                                 bytes, packets);
++      tcf_action_update_stats(a, bytes, packets, false, hw);
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+ }
+ 
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -308,10 +308,7 @@ static void tcf_vlan_stats_update(struct
+       struct tcf_vlan *v = to_vlan(a);
+       struct tcf_t *tm = &v->tcf_tm;
+ 
+-      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+-      if (hw)
+-              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+-                                 bytes, packets);
++      tcf_action_update_stats(a, bytes, packets, false, hw);
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+ }
+ 
diff --git a/queue-5.4/net-sched-extract-qstats-update-code-into-functions.patch b/queue-5.4/net-sched-extract-qstats-update-code-into-functions.patch

new file mode 100644 (file)

index 0000000..3d133f2
--- /dev/null
+++ b/queue-5.4/net-sched-extract-qstats-update-code-into-functions.patch
@@ -0,0 +1,114 @@
+From stable+bounces-164688-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:54 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:14 +0530
+Subject: net: sched: extract qstats update code into functions
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Vlad Buslov <vladbu@mellanox.com>, Jiri Pirko <jiri@mellanox.com>, "David S . Miller" <davem@davemloft.net>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-4-skulkarni@mvista.com>
+
+From: Vlad Buslov <vladbu@mellanox.com>
+
+[ Upstream commit 26b537a88ca5b7399c7ab0656e06dbd9da9513c1 ]
+
+Extract common code that increments cpu_qstats counters into standalone act
+API functions. Change hardware offloaded actions that use percpu counter
+allocation to use the new functions instead of accessing cpu_qstats
+directly.
+
+This commit doesn't change functionality.
+
+Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/act_api.h  |   16 ++++++++++++++++
+ net/sched/act_csum.c   |    2 +-
+ net/sched/act_ct.c     |    2 +-
+ net/sched/act_gact.c   |    2 +-
+ net/sched/act_mirred.c |    2 +-
+ net/sched/act_vlan.c   |    2 +-
+ 6 files changed, 21 insertions(+), 5 deletions(-)
+
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -193,6 +193,22 @@ static inline void tcf_action_update_bst
+       bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ }
+ 
++static inline struct gnet_stats_queue *
++tcf_action_get_qstats(struct tc_action *a)
++{
++      return this_cpu_ptr(a->cpu_qstats);
++}
++
++static inline void tcf_action_inc_drop_qstats(struct tc_action *a)
++{
++      qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
++}
++
++static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a)
++{
++      qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats));
++}
++
+ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+                            bool drop, bool hw);
+ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+--- a/net/sched/act_csum.c
++++ b/net/sched/act_csum.c
+@@ -621,7 +621,7 @@ out:
+       return action;
+ 
+ drop:
+-      qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
++      tcf_action_inc_drop_qstats(&p->common);
+       action = TC_ACT_SHOT;
+       goto out;
+ }
+--- a/net/sched/act_ct.c
++++ b/net/sched/act_ct.c
+@@ -486,7 +486,7 @@ out:
+       return retval;
+ 
+ drop:
+-      qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
++      tcf_action_inc_drop_qstats(&c->common);
+       return TC_ACT_SHOT;
+ }
+ 
+--- a/net/sched/act_gact.c
++++ b/net/sched/act_gact.c
+@@ -161,7 +161,7 @@ static int tcf_gact_act(struct sk_buff *
+ #endif
+       tcf_action_update_bstats(&gact->common, skb);
+       if (action == TC_ACT_SHOT)
+-              qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
++              tcf_action_inc_drop_qstats(&gact->common);
+ 
+       tcf_lastuse_update(&gact->tcf_tm);
+ 
+--- a/net/sched/act_mirred.c
++++ b/net/sched/act_mirred.c
+@@ -309,7 +309,7 @@ static int tcf_mirred_act(struct sk_buff
+ 
+       if (err) {
+ out:
+-              qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
++              tcf_action_inc_overlimit_qstats(&m->common);
+               if (tcf_mirred_is_act_redirect(m_eaction))
+                       retval = TC_ACT_SHOT;
+       }
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -88,7 +88,7 @@ out:
+       return action;
+ 
+ drop:
+-      qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
++      tcf_action_inc_drop_qstats(&v->common);
+       return TC_ACT_SHOT;
+ }
+ 
diff --git a/queue-5.4/sch_drr-make-drr_qlen_notify-idempotent.patch b/queue-5.4/sch_drr-make-drr_qlen_notify-idempotent.patch

new file mode 100644 (file)

index 0000000..cb56767
--- /dev/null
+++ b/queue-5.4/sch_drr-make-drr_qlen_notify-idempotent.patch
@@ -0,0 +1,66 @@
+From df008598b3a00be02a8051fde89ca0fbc416bd55 Mon Sep 17 00:00:00 2001
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 3 Apr 2025 14:10:24 -0700
+Subject: sch_drr: make drr_qlen_notify() idempotent
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+commit df008598b3a00be02a8051fde89ca0fbc416bd55 upstream.
+
+drr_qlen_notify() always deletes the DRR class from its active list
+with list_del(), therefore, it is not idempotent and not friendly
+to its callers, like fq_codel_dequeue().
+
+Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers'
+life. Also change other list_del()'s to list_del_init() just to be
+extra safe.
+
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250403211033.166059-3-xiyou.wangcong@gmail.com
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_drr.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -111,6 +111,7 @@ static int drr_change_class(struct Qdisc
+       if (cl == NULL)
+               return -ENOBUFS;
+ 
++      INIT_LIST_HEAD(&cl->alist);
+       cl->common.classid = classid;
+       cl->quantum        = quantum;
+       cl->qdisc          = qdisc_create_dflt(sch->dev_queue,
+@@ -234,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc
+ {
+       struct drr_class *cl = (struct drr_class *)arg;
+ 
+-      list_del(&cl->alist);
++      list_del_init(&cl->alist);
+ }
+ 
+ static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
+@@ -401,7 +402,7 @@ static struct sk_buff *drr_dequeue(struc
+                       if (unlikely(skb == NULL))
+                               goto out;
+                       if (cl->qdisc->q.qlen == 0)
+-                              list_del(&cl->alist);
++                              list_del_init(&cl->alist);
+ 
+                       bstats_update(&cl->bstats, skb);
+                       qdisc_bstats_update(sch, skb);
+@@ -442,7 +443,7 @@ static void drr_reset_qdisc(struct Qdisc
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
+                       if (cl->qdisc->q.qlen)
+-                              list_del(&cl->alist);
++                              list_del_init(&cl->alist);
+                       qdisc_reset(cl->qdisc);
+               }
+       }
diff --git a/queue-5.4/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch b/queue-5.4/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch

new file mode 100644 (file)

index 0000000..d7896d3
--- /dev/null
+++ b/queue-5.4/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch
@@ -0,0 +1,55 @@
+From 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb Mon Sep 17 00:00:00 2001
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 3 Apr 2025 14:10:25 -0700
+Subject: sch_hfsc: make hfsc_qlen_notify() idempotent
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+commit 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb upstream.
+
+hfsc_qlen_notify() is not idempotent either and not friendly
+to its callers, like fq_codel_dequeue(). Let's make it idempotent
+to ease qdisc_tree_reduce_backlog() callers' life:
+
+1. update_vf() decreases cl->cl_nactive, so we can check whether it is
+non-zero before calling it.
+
+2. eltree_remove() always removes RB node cl->el_node, but we can use
+   RB_EMPTY_NODE() + RB_CLEAR_NODE() to make it safe.
+
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250403211033.166059-4-xiyou.wangcong@gmail.com
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_hfsc.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -209,7 +209,10 @@ eltree_insert(struct hfsc_class *cl)
+ static inline void
+ eltree_remove(struct hfsc_class *cl)
+ {
+-      rb_erase(&cl->el_node, &cl->sched->eligible);
++      if (!RB_EMPTY_NODE(&cl->el_node)) {
++              rb_erase(&cl->el_node, &cl->sched->eligible);
++              RB_CLEAR_NODE(&cl->el_node);
++      }
+ }
+ 
+ static inline void
+@@ -1230,7 +1233,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsi
+       /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+        * needs to be called explicitly to remove a class from vttree.
+        */
+-      update_vf(cl, 0, 0);
++      if (cl->cl_nactive)
++              update_vf(cl, 0, 0);
+       if (cl->cl_flags & HFSC_RSC)
+               eltree_remove(cl);
+ }
diff --git a/queue-5.4/sch_qfq-make-qfq_qlen_notify-idempotent.patch b/queue-5.4/sch_qfq-make-qfq_qlen_notify-idempotent.patch

new file mode 100644 (file)

index 0000000..e4feba2
--- /dev/null
+++ b/queue-5.4/sch_qfq-make-qfq_qlen_notify-idempotent.patch
@@ -0,0 +1,67 @@
+From 55f9eca4bfe30a15d8656f915922e8c98b7f0728 Mon Sep 17 00:00:00 2001
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Thu, 3 Apr 2025 14:10:26 -0700
+Subject: sch_qfq: make qfq_qlen_notify() idempotent
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+commit 55f9eca4bfe30a15d8656f915922e8c98b7f0728 upstream.
+
+qfq_qlen_notify() always deletes its class from its active list
+with list_del_init() _and_ calls qfq_deactivate_agg() when the whole list
+becomes empty.
+
+To make it idempotent, just skip everything when it is not in the active
+list.
+
+Also change other list_del()'s to list_del_init() just to be extra safe.
+
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250403211033.166059-5-xiyou.wangcong@gmail.com
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_qfq.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -348,7 +348,7 @@ static void qfq_deactivate_class(struct
+       struct qfq_aggregate *agg = cl->agg;
+ 
+ 
+-      list_del(&cl->alist); /* remove from RR queue of the aggregate */
++      list_del_init(&cl->alist); /* remove from RR queue of the aggregate */
+       if (list_empty(&agg->active)) /* agg is now inactive */
+               qfq_deactivate_agg(q, agg);
+ }
+@@ -482,6 +482,7 @@ static int qfq_change_class(struct Qdisc
+ 
+       cl->common.classid = classid;
+       cl->deficit = lmax;
++      INIT_LIST_HEAD(&cl->alist);
+ 
+       cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+                                     classid, NULL);
+@@ -1001,7 +1002,7 @@ static struct sk_buff *agg_dequeue(struc
+       cl->deficit -= (int) len;
+ 
+       if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
+-              list_del(&cl->alist);
++              list_del_init(&cl->alist);
+       else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
+               cl->deficit += agg->lmax;
+               list_move_tail(&cl->alist, &agg->active);
+@@ -1433,6 +1434,8 @@ static void qfq_qlen_notify(struct Qdisc
+       struct qfq_sched *q = qdisc_priv(sch);
+       struct qfq_class *cl = (struct qfq_class *)arg;
+ 
++      if (list_empty(&cl->alist))
++              return;
+       qfq_deactivate_class(q, cl);
+ }
+ 
diff --git a/queue-5.4/selftests-forwarding-tc_actions.sh-add-matchall-mirror-test.patch b/queue-5.4/selftests-forwarding-tc_actions.sh-add-matchall-mirror-test.patch

new file mode 100644 (file)

index 0000000..306a904
--- /dev/null
+++ b/queue-5.4/selftests-forwarding-tc_actions.sh-add-matchall-mirror-test.patch
@@ -0,0 +1,93 @@
+From stable+bounces-164690-greg=kroah.com@vger.kernel.org Thu Jul 24 21:27:35 2025
+From: skulkarni@mvista.com
+Date: Fri, 25 Jul 2025 00:56:16 +0530
+Subject: selftests: forwarding: tc_actions.sh: add matchall mirror test
+To: stable@vger.kernel.org
+Cc: akuster@mvista.com, cminyard@mvista.com, Jiri Pirko <jiri@mellanox.com>, Ido Schimmel <idosch@mellanox.com>, "David S . Miller" <davem@davemloft.net>, Shubham Kulkarni <skulkarni@mvista.com>
+Message-ID: <20250724192619.217203-6-skulkarni@mvista.com>
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+[ Upstream commit 075c8aa79d541ea08c67a2e6d955f6457e98c21c ]
+
+Add test for matchall classifier with mirred egress mirror action.
+
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress")
+Signed-off-by: Shubham Kulkarni <skulkarni@mvista.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/forwarding/tc_actions.sh |   26 +++++++++++++------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
++++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
+@@ -2,7 +2,8 @@
+ # SPDX-License-Identifier: GPL-2.0
+ 
+ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+-      mirred_egress_mirror_test gact_trap_test"
++      mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
++      gact_trap_test"
+ NUM_NETIFS=4
+ source tc_common.sh
+ source lib.sh
+@@ -50,6 +51,9 @@ switch_destroy()
+ mirred_egress_test()
+ {
+       local action=$1
++      local protocol=$2
++      local classifier=$3
++      local classifier_args=$4
+ 
+       RET=0
+ 
+@@ -62,9 +66,9 @@ mirred_egress_test()
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_fail $? "Matched without redirect rule inserted"
+ 
+-      tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+-              $tcflags dst_ip 192.0.2.2 action mirred egress $action \
+-              dev $swp2
++      tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \
++              $classifier $tcflags $classifier_args \
++              action mirred egress $action dev $swp2
+ 
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t ip -q
+@@ -72,10 +76,11 @@ mirred_egress_test()
+       tc_check_packets "dev $h2 ingress" 101 1
+       check_err $? "Did not match incoming $action packet"
+ 
+-      tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
++      tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \
++              $classifier
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ 
+-      log_test "mirred egress $action ($tcflags)"
++      log_test "mirred egress $classifier $action ($tcflags)"
+ }
+ 
+ gact_drop_and_ok_test()
+@@ -187,12 +192,17 @@ cleanup()
+ 
+ mirred_egress_redirect_test()
+ {
+-      mirred_egress_test "redirect"
++      mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2"
+ }
+ 
+ mirred_egress_mirror_test()
+ {
+-      mirred_egress_test "mirror"
++      mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2"
++}
++
++matchall_mirred_egress_mirror_test()
++{
++      mirred_egress_test "mirror" "all" "matchall" ""
+ }
+ 
+ trap cleanup EXIT
diff --git a/queue-5.4/series b/queue-5.4/series

index 06d47a6cc9e30a8b7b5f6e7c538a24577239cc8f..d7177f564be7bc5a2d6fb0147ef3a48d43a2fbda 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -373,3 +373,18 @@ media-venus-protect-against-spurious-interrupts-during-probe.patch
  f2fs-fix-to-avoid-out-of-boundary-access-in-dnode-page.patch
  media-venus-hfi-explicitly-release-irq-during-teardown.patch
  btrfs-populate-otime-when-logging-an-inode-item.patch
+sch_drr-make-drr_qlen_notify-idempotent.patch
+sch_hfsc-make-hfsc_qlen_notify-idempotent.patch
+sch_qfq-make-qfq_qlen_notify-idempotent.patch
+codel-remove-sch-q.qlen-check-before-qdisc_tree_reduce_backlog.patch
+mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch
+mm-update-memfd-seal-write-check-to-include-f_seal_write.patch
+mm-perform-the-mapping_map_writable-check-after-call_mmap.patch
+net-sched-extract-common-action-counters-update-code-into-function.patch
+net-sched-extract-bstats-update-code-into-function.patch
+net-sched-extract-qstats-update-code-into-functions.patch
+net-sched-don-t-expose-action-qstats-to-skb_tc_reinsert.patch
+selftests-forwarding-tc_actions.sh-add-matchall-mirror-test.patch
+net-sched-act_mirred-refactor-the-handle-of-xmit.patch
+net-sched-act_mirred-better-wording-on-protection-against-excessive-stack-growth.patch
+act_mirred-use-the-backlog-for-nested-calls-to-mirred-ingress.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 24 Aug 2025 08:53:40 +0000 (10:53 +0200)
queue-5.4/act_mirred-use-the-backlog-for-nested-calls-to-mirred-ingress.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/codel-remove-sch-q.qlen-check-before-qdisc_tree_reduce_backlog.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-drop-the-assumption-that-vm_shared-always-implies-writable.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-perform-the-mapping_map_writable-check-after-call_mmap.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mm-update-memfd-seal-write-check-to-include-f_seal_write.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-act_mirred-better-wording-on-protection-against-excessive-stack-growth.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-act_mirred-refactor-the-handle-of-xmit.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-don-t-expose-action-qstats-to-skb_tc_reinsert.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-extract-bstats-update-code-into-function.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-extract-common-action-counters-update-code-into-function.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sched-extract-qstats-update-code-into-functions.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sch_drr-make-drr_qlen_notify-idempotent.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sch_hfsc-make-hfsc_qlen_notify-idempotent.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sch_qfq-make-qfq_qlen_notify-idempotent.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/selftests-forwarding-tc_actions.sh-add-matchall-mirror-test.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history