Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
- include/uapi/linux/bpf.h | 2 ++
- net/core/filter.c | 5 +++--
- tools/include/uapi/linux/bpf.h | 2 ++
+ include/uapi/linux/bpf.h | 2 ++
+ net/core/filter.c | 5 +++--
+ tools/include/uapi/linux/bpf.h | 2 ++
3 files changed, 7 insertions(+), 2 deletions(-)
-diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
-index 0bdeeabbc5a8..2ac62d5ed466 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1695,6 +1695,7 @@ union bpf_attr {
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
-diff --git a/net/core/filter.c b/net/core/filter.c
-index 65b7fb9c3d29..169d9ba4e7a0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
-@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
bool do_mforce = flags & BPF_F_MARK_ENFORCE;
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
-@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
if (unlikely(from != 0))
return -EINVAL;
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
-diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
-index 54b8c899d21c..fe70f9ce8b00 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1695,6 +1695,7 @@ union bpf_attr {
};
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
---
-2.43.0
-
--- /dev/null
+From 76486b104168ae59703190566e372badf433314b Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sat, 5 Oct 2024 00:15:56 +0200
+Subject: ext4: avoid remount errors with 'abort' mount option
+
+From: Jan Kara <jack@suse.cz>
+
+commit 76486b104168ae59703190566e372badf433314b upstream.
+
+When we remount filesystem with 'abort' mount option while changing
+other mount options as well (as is LTP test doing), we can return error
+from the system call after commit d3476f3dad4a ("ext4: don't set
+SB_RDONLY after filesystem errors") because the application of mount
+option changes detects shutdown filesystem and refuses to do anything.
+The behavior of application of other mount options in presence of
+'abort' mount option is currently rather arbitary as some mount option
+changes are handled before 'abort' and some after it.
+
+Move aborting of the filesystem to the end of remount handling so all
+requested changes are properly applied before the filesystem is shutdown
+to have a reasonably consistent behavior.
+
+Fixes: d3476f3dad4a ("ext4: don't set SB_RDONLY after filesystem errors")
+Reported-by: Jan Stancek <jstancek@redhat.com>
+Link: https://lore.kernel.org/all/Zvp6L+oFnfASaoHl@t14s
+Signed-off-by: Jan Kara <jack@suse.cz>
+Tested-by: Jan Stancek <jstancek@redhat.com>
+Link: https://patch.msgid.link/20241004221556.19222-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5849,9 +5849,6 @@ static int ext4_remount(struct super_blo
+ goto restore_opts;
+ }
+
+- if (test_opt2(sb, ABORT))
+- ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
+-
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+
+@@ -6027,6 +6024,14 @@ static int ext4_remount(struct super_blo
+ */
+ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
+
++ /*
++ * Handle aborting the filesystem as the last thing during remount to
++ * avoid obsure errors during remount when some option changes fail to
++ * apply due to shutdown filesystem.
++ */
++ if (test_opt2(sb, ABORT))
++ ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
++
+ ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
+ orig_data, ext4_quota_mode(sb));
+ kfree(orig_data);
--- /dev/null
+From 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 16 Jun 2023 18:50:50 +0200
+Subject: ext4: make 'abort' mount option handling standard
+
+From: Jan Kara <jack@suse.cz>
+
+commit 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d upstream.
+
+'abort' mount option is the only mount option that has special handling
+and sets a bit in sbi->s_mount_flags. There is not strong reason for
+that so just simplify the code and make 'abort' set a bit in
+sbi->s_mount_opt2 as any other mount option. This simplifies the code
+and will allow us to drop EXT4_MF_FS_ABORTED completely in the following
+patch.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230616165109.21695-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 76486b104168 ("ext4: avoid remount errors with 'abort' mount option")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ext4.h | 1 +
+ fs/ext4/super.c | 6 ++----
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1255,6 +1255,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group
+ * scanning in mballoc
+ */
++#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
+
+ #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
+ ~EXT4_MOUNT_##opt
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2023,6 +2023,7 @@ static const struct mount_opts {
+ MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
+ {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
+ #endif
++ {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
+ {Opt_err, 0, 0}
+ };
+
+@@ -2143,9 +2144,6 @@ static int handle_mount_opt(struct super
+ case Opt_removed:
+ ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
+ return 1;
+- case Opt_abort:
+- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+- return 1;
+ case Opt_i_version:
+ sb->s_flags |= SB_I_VERSION;
+ return 1;
+@@ -5851,7 +5849,7 @@ static int ext4_remount(struct super_blo
+ goto restore_opts;
+ }
+
+- if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
++ if (test_opt2(sb, ABORT))
+ ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
+
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
--- /dev/null
+From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
+From: Gavin Guo <gavinguo@igalia.com>
+Date: Mon, 21 Apr 2025 19:35:36 +0800
+Subject: mm/huge_memory: fix dereferencing invalid pmd migration entry
+
+From: Gavin Guo <gavinguo@igalia.com>
+
+commit be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 upstream.
+
+When migrating a THP, concurrent access to the PMD migration entry during
+a deferred split scan can lead to an invalid address access, as
+illustrated below. To prevent this invalid access, it is necessary to
+check the PMD migration entry and return early. In this context, there is
+no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
+equality of the target folio. Since the PMD migration entry is locked, it
+cannot be served as the target.
+
+Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
+lookup points to a location which may contain the folio of interest, but
+might instead contain another folio: and weeding out those other folios is
+precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
+replacing the wrong folio" comment a few lines above it) is for."
+
+BUG: unable to handle page fault for address: ffffea60001db008
+CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
+Call Trace:
+<TASK>
+try_to_migrate_one+0x28c/0x3730
+rmap_walk_anon+0x4f6/0x770
+unmap_folio+0x196/0x1f0
+split_huge_page_to_list_to_order+0x9f6/0x1560
+deferred_split_scan+0xac5/0x12a0
+shrinker_debugfs_scan_write+0x376/0x470
+full_proxy_write+0x15c/0x220
+vfs_write+0x2fc/0xcb0
+ksys_write+0x146/0x250
+do_syscall_64+0x6a/0x120
+entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+The bug is found by syzkaller on an internal kernel, then confirmed on
+upstream.
+
+Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
+Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
+Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
+Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
+Signed-off-by: Gavin Guo <gavinguo@igalia.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Cc: Florent Revest <revest@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[gavin: backport the migration checking logic to __split_huge_pmd]
+Signed-off-by: Gavin Guo <gavinguo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2161,7 +2161,7 @@ void __split_huge_pmd(struct vm_area_str
+ VM_BUG_ON(freeze && !page);
+ if (page) {
+ VM_WARN_ON_ONCE(!PageLocked(page));
+- if (page != pmd_page(*pmd))
++ if (is_pmd_migration_entry(*pmd) || page != pmd_page(*pmd))
+ goto out;
+ }
+
--- /dev/null
+From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001
+From: Paul Chaignon <paul.chaignon@gmail.com>
+Date: Thu, 29 May 2025 12:28:05 +0200
+Subject: net: Fix checksum update for ILA adj-transport
+
+From: Paul Chaignon <paul.chaignon@gmail.com>
+
+commit 6043b794c7668c19dabc4a93c75b924a19474d59 upstream.
+
+During ILA address translations, the L4 checksums can be handled in
+different ways. One of them, adj-transport, consist in parsing the
+transport layer and updating any found checksum. This logic relies on
+inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when
+in state CHECKSUM_COMPLETE.
+
+This bug can be reproduced with a simple ILA to SIR mapping, assuming
+packets are received with CHECKSUM_COMPLETE:
+
+ $ ip a show dev eth0
+ 14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+ link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0
+ inet6 3333:0:0:1::c078/64 scope global
+ valid_lft forever preferred_lft forever
+ inet6 fd00:10:244:1::c078/128 scope global nodad
+ valid_lft forever preferred_lft forever
+ inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll
+ valid_lft forever preferred_lft forever
+ $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \
+ csum-mode adj-transport ident-type luid dev eth0
+
+Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on
+[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with
+SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed
+skb->csum. The translation and drop are visible on pwru [1] traces:
+
+ IFACE TUPLE FUNC
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM)
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem
+
+This is happening because inet_proto_csum_replace_by_diff is updating
+skb->csum when it shouldn't. The L4 checksum is updated such that it
+"cancels" the IPv6 address change in terms of checksum computation, so
+the impact on skb->csum is null.
+
+Note this would be different for an IPv4 packet since three fields
+would be updated: the IPv4 address, the IP checksum, and the L4
+checksum. Two would cancel each other and skb->csum would still need
+to be updated to take the L4 checksum change into account.
+
+This patch fixes it by passing an ipv6 flag to
+inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're
+in the IPv6 case. Note the behavior of the only other user of
+inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in
+this patch and fixed in the subsequent patch.
+
+With the fix, using the reproduction from above, I can confirm
+skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP
+SYN proceeds to the application after the ILA translation.
+
+Link: https://github.com/cilium/pwru [1]
+Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module")
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Fixed conflict due to unrelated change in inet_proto_csum_replace_by_diff. ]
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/checksum.h | 2 +-
+ net/core/filter.c | 2 +-
+ net/core/utils.c | 4 ++--
+ net/ipv6/ila/ila_common.c | 6 +++---
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/include/net/checksum.h
++++ b/include/net/checksum.h
+@@ -154,7 +154,7 @@ void inet_proto_csum_replace16(__sum16 *
+ const __be32 *from, const __be32 *to,
+ bool pseudohdr);
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+- __wsum diff, bool pseudohdr);
++ __wsum diff, bool pseudohdr, bool ipv6);
+
+ static __always_inline
+ void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1970,7 +1970,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false);
+ break;
+ case 2:
+ inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+--- a/net/core/utils.c
++++ b/net/core/utils.c
+@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *
+ EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+- __wsum diff, bool pseudohdr)
++ __wsum diff, bool pseudohdr, bool ipv6)
+ {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
++ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
+ skb->csum = ~csum_add(diff, ~skb->csum);
+ } else if (pseudohdr) {
+ *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+--- a/net/ipv6/ila/ila_common.c
++++ b/net/ipv6/ila/ila_common.c
+@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(st
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+- diff, true);
++ diff, true, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(st
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+- diff, true);
++ diff, true, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(st
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+- diff, true);
++ diff, true, true);
+ }
+ break;
+ }
--- /dev/null
+From stable+bounces-155162-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:54 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:17 +0000
+Subject: net_sched: sch_sfq: annotate data-races around q->perturb_period
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Simon Horman <horms@kernel.org>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-1-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit a17ef9e6c2c1cf0fc6cd6ca6a9ce525c67d1da7f upstream.
+
+sfq_perturbation() reads q->perturb_period locklessly.
+Add annotations to fix potential issues.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240430180015.3111398-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -611,6 +611,7 @@ static void sfq_perturbation(struct time
+ struct Qdisc *sch = q->sch;
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ siphash_key_t nkey;
++ int period;
+
+ get_random_bytes(&nkey, sizeof(nkey));
+ spin_lock(root_lock);
+@@ -619,8 +620,12 @@ static void sfq_perturbation(struct time
+ sfq_rehash(sch);
+ spin_unlock(root_lock);
+
+- if (q->perturb_period)
+- mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
++ /* q->perturb_period can change under us from
++ * sfq_change() and sfq_destroy().
++ */
++ period = READ_ONCE(q->perturb_period);
++ if (period)
++ mod_timer(&q->perturb_timer, jiffies + period);
+ }
+
+ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch,
+ q->quantum = ctl->quantum;
+ q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+ }
+- q->perturb_period = ctl->perturb_period * HZ;
++ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+ if (ctl->flows)
+ q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+ if (ctl->divisor) {
+@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sc
+ struct sfq_sched_data *q = qdisc_priv(sch);
+
+ tcf_block_put(q->block);
+- q->perturb_period = 0;
++ WRITE_ONCE(q->perturb_period, 0);
+ del_timer_sync(&q->perturb_timer);
+ sfq_free(q->ht);
+ sfq_free(q->slots);
--- /dev/null
+From stable+bounces-155164-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:57 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:19 +0000
+Subject: net_sched: sch_sfq: don't allow 1 packet limit
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-3-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit 10685681bafce6febb39770f3387621bf5d67d0b upstream.
+
+The current implementation does not work correctly with a limit of
+1. iproute2 actually checks for this and this patch adds the check in
+kernel as well.
+
+This fixes the following syzkaller reported crash:
+
+UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:210:6
+index 65535 is out of range for type 'struct sfq_head[128]'
+CPU: 0 PID: 2569 Comm: syz-executor101 Not tainted 5.10.0-smp-DEV #1
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+Call Trace:
+ __dump_stack lib/dump_stack.c:79 [inline]
+ dump_stack+0x125/0x19f lib/dump_stack.c:120
+ ubsan_epilogue lib/ubsan.c:148 [inline]
+ __ubsan_handle_out_of_bounds+0xed/0x120 lib/ubsan.c:347
+ sfq_link net/sched/sch_sfq.c:210 [inline]
+ sfq_dec+0x528/0x600 net/sched/sch_sfq.c:238
+ sfq_dequeue+0x39b/0x9d0 net/sched/sch_sfq.c:500
+ sfq_reset+0x13/0x50 net/sched/sch_sfq.c:525
+ qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026
+ tbf_reset+0x3d/0x100 net/sched/sch_tbf.c:319
+ qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026
+ dev_reset_queue+0x8c/0x140 net/sched/sch_generic.c:1296
+ netdev_for_each_tx_queue include/linux/netdevice.h:2350 [inline]
+ dev_deactivate_many+0x6dc/0xc20 net/sched/sch_generic.c:1362
+ __dev_close_many+0x214/0x350 net/core/dev.c:1468
+ dev_close_many+0x207/0x510 net/core/dev.c:1506
+ unregister_netdevice_many+0x40f/0x16b0 net/core/dev.c:10738
+ unregister_netdevice_queue+0x2be/0x310 net/core/dev.c:10695
+ unregister_netdevice include/linux/netdevice.h:2893 [inline]
+ __tun_detach+0x6b6/0x1600 drivers/net/tun.c:689
+ tun_detach drivers/net/tun.c:705 [inline]
+ tun_chr_close+0x104/0x1b0 drivers/net/tun.c:3640
+ __fput+0x203/0x840 fs/file_table.c:280
+ task_work_run+0x129/0x1b0 kernel/task_work.c:185
+ exit_task_work include/linux/task_work.h:33 [inline]
+ do_exit+0x5ce/0x2200 kernel/exit.c:931
+ do_group_exit+0x144/0x310 kernel/exit.c:1046
+ __do_sys_exit_group kernel/exit.c:1057 [inline]
+ __se_sys_exit_group kernel/exit.c:1055 [inline]
+ __x64_sys_exit_group+0x3b/0x40 kernel/exit.c:1055
+ do_syscall_64+0x6c/0xd0
+ entry_SYSCALL_64_after_hwframe+0x61/0xcb
+RIP: 0033:0x7fe5e7b52479
+Code: Unable to access opcode bytes at RIP 0x7fe5e7b5244f.
+RSP: 002b:00007ffd3c800398 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe5e7b52479
+RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000
+RBP: 00007fe5e7bcd2d0 R08: ffffffffffffffb8 R09: 0000000000000014
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe5e7bcd2d0
+R13: 0000000000000000 R14: 00007fe5e7bcdd20 R15: 00007fe5e7b24270
+
+The crash can be also be reproduced with the following (with a tc
+recompiled to allow for sfq limits of 1):
+
+tc qdisc add dev dummy0 handle 1: root tbf rate 1Kbit burst 100b lat 1s
+../iproute2-6.9.0/tc/tc qdisc add dev dummy0 handle 2: parent 1:10 sfq limit 1
+ifconfig dummy0 up
+ping -I dummy0 -f -c2 -W0.1 8.8.8.8
+sleep 1
+
+Scenario that triggers the crash:
+
+* the first packet is sent and queued in TBF and SFQ; qdisc qlen is 1
+
+* TBF dequeues: it peeks from SFQ which moves the packet to the
+ gso_skb list and keeps qdisc qlen set to 1. TBF is out of tokens so
+ it schedules itself for later.
+
+* the second packet is sent and TBF tries to queues it to SFQ. qdisc
+ qlen is now 2 and because the SFQ limit is 1 the packet is dropped
+ by SFQ. At this point qlen is 1, and all of the SFQ slots are empty,
+ however q->tail is not NULL.
+
+At this point, assuming no more packets are queued, when sch_dequeue
+runs again it will decrement the qlen for the current empty slot
+causing an underflow and the subsequent out of bounds access.
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241204030520.2084663-2-tavip@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch,
+ if (!p)
+ return -ENOMEM;
+ }
++ if (ctl->limit == 1) {
++ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
++ return -EINVAL;
++ }
+ sch_tree_lock(sch);
+ if (ctl->quantum)
+ q->quantum = ctl->quantum;
--- /dev/null
+From stable+bounces-155163-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:01 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:18 +0000
+Subject: net_sched: sch_sfq: handle bigger packets
+To: stable@vger.kernel.org
+Cc: "Eric Dumazet" <edumazet@google.com>, "Toke Høiland-Jørgensen" <toke@redhat.com>, "Jakub Kicinski" <kuba@kernel.org>
+Message-ID: <20250620154623.331294-2-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e4650d7ae4252f67e997a632adfae0dd74d3a99a upstream.
+
+SFQ has an assumption on dealing with packets smaller than 64KB.
+
+Even before BIG TCP, TCA_STAB can provide arbitrary big values
+in qdisc_pkt_len(skb)
+
+It is time to switch (struct sfq_slot)->allot to a 32bit field.
+
+sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 39 +++++++++++++--------------------------
+ 1 file changed, 13 insertions(+), 26 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -77,12 +77,6 @@
+ #define SFQ_EMPTY_SLOT 0xffff
+ #define SFQ_DEFAULT_HASH_DIVISOR 1024
+
+-/* We use 16 bits to store allot, and want to handle packets up to 64K
+- * Scale allot by 8 (1<<3) so that no overflow occurs.
+- */
+-#define SFQ_ALLOT_SHIFT 3
+-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
+-
+ /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
+ typedef u16 sfq_index;
+
+@@ -104,7 +98,7 @@ struct sfq_slot {
+ sfq_index next; /* next slot in sfq RR chain */
+ struct sfq_head dep; /* anchor in dep[] chains */
+ unsigned short hash; /* hash value (index in ht[]) */
+- short allot; /* credit for this slot */
++ int allot; /* credit for this slot */
+
+ unsigned int backlog;
+ struct red_vars vars;
+@@ -120,7 +114,6 @@ struct sfq_sched_data {
+ siphash_key_t perturbation;
+ u8 cur_depth; /* depth of longest slot */
+ u8 flags;
+- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
+ struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
+ sfq_index *ht; /* Hash table ('divisor' slots) */
+@@ -459,7 +452,7 @@ enqueue:
+ */
+ q->tail = slot;
+ /* We could use a bigger initial quantum for new flows */
+- slot->allot = q->scaled_quantum;
++ slot->allot = q->quantum;
+ }
+ if (++sch->q.qlen <= q->limit)
+ return NET_XMIT_SUCCESS;
+@@ -496,7 +489,7 @@ next_slot:
+ slot = &q->slots[a];
+ if (slot->allot <= 0) {
+ q->tail = slot;
+- slot->allot += q->scaled_quantum;
++ slot->allot += q->quantum;
+ goto next_slot;
+ }
+ skb = slot_dequeue_head(slot);
+@@ -515,7 +508,7 @@ next_slot:
+ }
+ q->tail->next = next_a;
+ } else {
+- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
++ slot->allot -= qdisc_pkt_len(skb);
+ }
+ return skb;
+ }
+@@ -598,7 +591,7 @@ drop:
+ q->tail->next = x;
+ }
+ q->tail = slot;
+- slot->allot = q->scaled_quantum;
++ slot->allot = q->quantum;
+ }
+ }
+ sch->q.qlen -= dropped;
+@@ -628,7 +621,8 @@ static void sfq_perturbation(struct time
+ mod_timer(&q->perturb_timer, jiffies + period);
+ }
+
+-static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
++static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
++ struct netlink_ext_ack *extack)
+ {
+ struct sfq_sched_data *q = qdisc_priv(sch);
+ struct tc_sfq_qopt *ctl = nla_data(opt);
+@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch,
+ (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
+ return -EINVAL;
+
+- /* slot->allot is a short, make sure quantum is not too big. */
+- if (ctl->quantum) {
+- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+-
+- if (scaled <= 0 || scaled > SHRT_MAX)
+- return -EINVAL;
++ if ((int)ctl->quantum < 0) {
++ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
++ return -EINVAL;
+ }
+-
+ if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+ ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
+ return -EINVAL;
+@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch,
+ return -ENOMEM;
+ }
+ sch_tree_lock(sch);
+- if (ctl->quantum) {
++ if (ctl->quantum)
+ q->quantum = ctl->quantum;
+- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+- }
+ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+ if (ctl->flows)
+ q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, s
+ q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
+ q->maxflows = SFQ_DEFAULT_FLOWS;
+ q->quantum = psched_mtu(qdisc_dev(sch));
+- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+ q->perturb_period = 0;
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
+
+ if (opt) {
+- int err = sfq_change(sch, opt);
++ int err = sfq_change(sch, opt, extack);
+ if (err)
+ return err;
+ }
+@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Q
+ if (idx != SFQ_EMPTY_SLOT) {
+ const struct sfq_slot *slot = &q->slots[idx];
+
+- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
++ xstats.allot = slot->allot;
+ qs.qlen = slot->qlen;
+ qs.backlog = slot->backlog;
+ }
--- /dev/null
+From stable+bounces-155166-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:20 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:21 +0000
+Subject: net_sched: sch_sfq: move the limit validation
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net>
+Message-ID: <20250620154623.331294-5-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 upstream.
+
+It is not sufficient to directly validate the limit on the data that
+the user passes as it can be updated based on how the other parameters
+are changed.
+
+Move the check at the end of the configuration update process to also
+catch scenarios where the limit is indirectly updated, for example
+with the following configurations:
+
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1
+
+This fixes the following syzkaller reported crash:
+
+------------[ cut here ]------------
+UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6
+index 65535 is out of range for type 'struct sfq_head[128]'
+CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:94 [inline]
+ dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120
+ ubsan_epilogue lib/ubsan.c:231 [inline]
+ __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429
+ sfq_link net/sched/sch_sfq.c:203 [inline]
+ sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231
+ sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493
+ sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311
+ netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline]
+ dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit")
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch,
+ if (!p)
+ return -ENOMEM;
+ }
+- if (ctl->limit == 1) {
+- NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+- return -EINVAL;
+- }
+
+ sch_tree_lock(sch);
+
+@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch,
+ limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+ maxflows = min_t(u32, maxflows, limit);
+ }
++ if (limit == 1) {
++ sch_tree_unlock(sch);
++ kfree(p);
++ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
++ return -EINVAL;
++ }
+
+ /* commit configuration */
+ q->limit = limit;
--- /dev/null
+From stable+bounces-155168-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:24 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:23 +0000
+Subject: net_sched: sch_sfq: reject invalid perturb period
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Gerrard Tai <gerrard.tai@starlabs.sg>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-7-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 7ca52541c05c832d32b112274f81a985101f9ba8 upstream.
+
+Gerrard Tai reported that SFQ perturb_period has no range check yet,
+and this can be used to trigger a race condition fixed in a separate patch.
+
+We want to make sure ctl->perturb_period * HZ will not overflow
+and is positive.
+
+Tested:
+
+tc qd add dev lo root sfq perturb -10 # negative value : error
+Error: sch_sfq: invalid perturb period.
+
+tc qd add dev lo root sfq perturb 1000000000 # too big : error
+Error: sch_sfq: invalid perturb period.
+
+tc qd add dev lo root sfq perturb 2000000 # acceptable value
+tc -s -d qd sh dev lo
+qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec
+ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -653,6 +653,14 @@ static int sfq_change(struct Qdisc *sch,
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
+ return -EINVAL;
+ }
++
++ if (ctl->perturb_period < 0 ||
++ ctl->perturb_period > INT_MAX / HZ) {
++ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
++ return -EINVAL;
++ }
++ perturb_period = ctl->perturb_period * HZ;
++
+ if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+ ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
+ return -EINVAL;
+@@ -669,14 +677,12 @@ static int sfq_change(struct Qdisc *sch,
+ headdrop = q->headdrop;
+ maxdepth = q->maxdepth;
+ maxflows = q->maxflows;
+- perturb_period = q->perturb_period;
+ quantum = q->quantum;
+ flags = q->flags;
+
+ /* update and validate configuration */
+ if (ctl->quantum)
+ quantum = ctl->quantum;
+- perturb_period = ctl->perturb_period * HZ;
+ if (ctl->flows)
+ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+ if (ctl->divisor) {
--- /dev/null
+From stable+bounces-155165-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:18 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:20 +0000
+Subject: net_sched: sch_sfq: use a temporary work area for validating configuration
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net>
+Message-ID: <20250620154623.331294-4-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 upstream.
+
+Many configuration parameters have influence on others (e.g. divisor
+-> flows -> limit, depth -> limit) and so it is difficult to correctly
+do all of the validation before applying the configuration. And if a
+validation error is detected late it is difficult to roll back a
+partially applied configuration.
+
+To avoid these issues use a temporary work area to update and validate
+the configuration and only then apply the configuration to the
+internal state.
+
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c | 56 ++++++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 44 insertions(+), 12 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch,
+ struct red_parms *p = NULL;
+ struct sk_buff *to_free = NULL;
+ struct sk_buff *tail = NULL;
++ unsigned int maxflows;
++ unsigned int quantum;
++ unsigned int divisor;
++ int perturb_period;
++ u8 headdrop;
++ u8 maxdepth;
++ int limit;
++ u8 flags;
++
+
+ if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
+ return -EINVAL;
+@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch,
+ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+ return -EINVAL;
+ }
++
+ sch_tree_lock(sch);
++
++ limit = q->limit;
++ divisor = q->divisor;
++ headdrop = q->headdrop;
++ maxdepth = q->maxdepth;
++ maxflows = q->maxflows;
++ perturb_period = q->perturb_period;
++ quantum = q->quantum;
++ flags = q->flags;
++
++ /* update and validate configuration */
+ if (ctl->quantum)
+- q->quantum = ctl->quantum;
+- WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
++ quantum = ctl->quantum;
++ perturb_period = ctl->perturb_period * HZ;
+ if (ctl->flows)
+- q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
++ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+ if (ctl->divisor) {
+- q->divisor = ctl->divisor;
+- q->maxflows = min_t(u32, q->maxflows, q->divisor);
++ divisor = ctl->divisor;
++ maxflows = min_t(u32, maxflows, divisor);
+ }
+ if (ctl_v1) {
+ if (ctl_v1->depth)
+- q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
++ maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+ if (p) {
+- swap(q->red_parms, p);
+- red_set_parms(q->red_parms,
++ red_set_parms(p,
+ ctl_v1->qth_min, ctl_v1->qth_max,
+ ctl_v1->Wlog,
+ ctl_v1->Plog, ctl_v1->Scell_log,
+ NULL,
+ ctl_v1->max_P);
+ }
+- q->flags = ctl_v1->flags;
+- q->headdrop = ctl_v1->headdrop;
++ flags = ctl_v1->flags;
++ headdrop = ctl_v1->headdrop;
+ }
+ if (ctl->limit) {
+- q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+- q->maxflows = min_t(u32, q->maxflows, q->limit);
++ limit = min_t(u32, ctl->limit, maxdepth * maxflows);
++ maxflows = min_t(u32, maxflows, limit);
+ }
+
++ /* commit configuration */
++ q->limit = limit;
++ q->divisor = divisor;
++ q->headdrop = headdrop;
++ q->maxdepth = maxdepth;
++ q->maxflows = maxflows;
++ WRITE_ONCE(q->perturb_period, perturb_period);
++ q->quantum = quantum;
++ q->flags = flags;
++ if (p)
++ swap(q->red_parms, p);
++
+ qlen = sch->q.qlen;
+ while (sch->q.qlen > q->limit) {
+ dropped += sfq_drop(sch, &to_free);
arm64-bpf-add-bhb-mitigation-to-the-epilogue-for-cbpf-programs.patch
arm64-bpf-only-mitigate-cbpf-programs-loaded-by-unprivileged-users.patch
arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch
+net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch
+net_sched-sch_sfq-handle-bigger-packets.patch
+net_sched-sch_sfq-don-t-allow-1-packet-limit.patch
+net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch
+net_sched-sch_sfq-move-the-limit-validation.patch
+net_sched-sch_sfq-reject-invalid-perturb-period.patch
+mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch
+ext4-make-abort-mount-option-handling-standard.patch
+ext4-avoid-remount-errors-with-abort-mount-option.patch
+net-fix-checksum-update-for-ila-adj-transport.patch
bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch