From 0d9333e5e873cac4c6e579303cac2d06be307963 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 24 May 2025 06:22:31 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- ...-fix-forwarding-of-fragmented-packet.patch | 95 +++++++++++++++++++ ...use-parsed-internal-phy-address-inst.patch | 48 ++++++++++ ...n-accounting-bug-when-using-peek-in-.patch | 62 ++++++++++++ queue-5.4/series | 4 + .../xfrm-sanitize-marks-before-insert.patch | 71 ++++++++++++++ 5 files changed, 280 insertions(+) create mode 100644 queue-5.4/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch create mode 100644 queue-5.4/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch create mode 100644 queue-5.4/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch create mode 100644 queue-5.4/xfrm-sanitize-marks-before-insert.patch diff --git a/queue-5.4/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch b/queue-5.4/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch new file mode 100644 index 0000000000..97e262271b --- /dev/null +++ b/queue-5.4/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch @@ -0,0 +1,95 @@ +From 227e252aa090cd38a899951347b7266bdde80d41 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 May 2025 11:48:48 +0300 +Subject: bridge: netfilter: Fix forwarding of fragmented packets + +From: Ido Schimmel + +[ Upstream commit 91b6dbced0ef1d680afdd69b14fc83d50ebafaf3 ] + +When netfilter defrag hooks are loaded (due to the presence of conntrack +rules, for example), fragmented packets entering the bridge will be +defragged by the bridge's pre-routing hook (br_nf_pre_routing() -> +ipv4_conntrack_defrag()). + +Later on, in the bridge's post-routing hook, the defragged packet will +be fragmented again. If the size of the largest fragment is larger than +what the kernel has determined as the destination MTU (using +ip_skb_dst_mtu()), the defragged packet will be dropped. + +Before commit ac6627a28dbf ("net: ipv4: Consolidate ipv4_mtu and +ip_dst_mtu_maybe_forward"), ip_skb_dst_mtu() would return dst_mtu() as +the destination MTU. Assuming the dst entry attached to the packet is +the bridge's fake rtable one, this would simply be the bridge's MTU (see +fake_mtu()). + +However, after above mentioned commit, ip_skb_dst_mtu() ends up +returning the route's MTU stored in the dst entry's metrics. Ideally, in +case the dst entry is the bridge's fake rtable one, this should be the +bridge's MTU as the bridge takes care of updating this metric when its +MTU changes (see br_change_mtu()). + +Unfortunately, the last operation is a no-op given the metrics attached +to the fake rtable entry are marked as read-only. Therefore, +ip_skb_dst_mtu() ends up returning 1500 (the initial MTU value) and +defragged packets are dropped during fragmentation when dealing with +large fragments and high MTU (e.g., 9k). + +Fix by moving the fake rtable entry's metrics to be per-bridge (in a +similar fashion to the fake rtable entry itself) and marking them as +writable, thereby allowing MTU changes to be reflected. + +Fixes: 62fa8a846d7d ("net: Implement read-only protection and COW'ing of metrics.") +Fixes: 33eb9873a283 ("bridge: initialize fake_rtable metrics") +Reported-by: Venkat Venkatsubra +Closes: https://lore.kernel.org/netdev/PH0PR10MB4504888284FF4CBA648197D0ACB82@PH0PR10MB4504.namprd10.prod.outlook.com/ +Tested-by: Venkat Venkatsubra +Signed-off-by: Ido Schimmel +Acked-by: Nikolay Aleksandrov +Link: https://patch.msgid.link/20250515084848.727706-1-idosch@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_nf_core.c | 7 ++----- + net/bridge/br_private.h | 1 + + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c +index 8c69f0c95a8ed..b8c8deb87407d 100644 +--- a/net/bridge/br_nf_core.c ++++ b/net/bridge/br_nf_core.c +@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = { + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. + */ +-static const u32 br_dst_default_metrics[RTAX_MAX] = { +- [RTAX_MTU - 1] = 1500, +-}; +- + void br_netfilter_rtable_init(struct net_bridge *br) + { + struct rtable *rt = &br->fake_rtable; + + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; +- dst_init_metrics(&rt->dst, br_dst_default_metrics, true); ++ dst_init_metrics(&rt->dst, br->metrics, false); ++ dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; + rt->dst.ops = &fake_dst_ops; + } +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index 5ba4620727a7e..eb0b1b513feb3 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -346,6 +346,7 @@ struct net_bridge { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; ++ u32 metrics[RTAX_MAX]; + #endif + u16 group_fwd_mask; + u16 group_fwd_mask_required; +-- +2.39.5 + diff --git a/queue-5.4/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch b/queue-5.4/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch new file mode 100644 index 0000000000..1aef97d797 --- /dev/null +++ b/queue-5.4/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch @@ -0,0 +1,48 @@ +From 1ee9c81c239400b0b176642e7c3274270afb6b4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 May 2025 18:49:36 +0200 +Subject: net: dwmac-sun8i: Use parsed internal PHY address instead of 1 + +From: Paul Kocialkowski + +[ Upstream commit 47653e4243f2b0a26372e481ca098936b51ec3a8 ] + +While the MDIO address of the internal PHY on Allwinner sun8i chips is +generally 1, of_mdio_parse_addr is used to cleanly parse the address +from the device-tree instead of hardcoding it. + +A commit reworking the code ditched the parsed value and hardcoded the +value 1 instead, which didn't really break anything but is more fragile +and not future-proof. + +Restore the initial behavior using the parsed address returned from the +helper. + +Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") +Signed-off-by: Paul Kocialkowski +Reviewed-by: Andrew Lunn +Acked-by: Corentin LABBE +Tested-by: Corentin LABBE +Link: https://patch.msgid.link/20250519164936.4172658-1-paulk@sys-base.io +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +index 497ce6e6b16ff..99387e39c04ea 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +@@ -919,7 +919,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) + /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY + * address. No need to mask it again. + */ +- reg |= 1 << H3_EPHY_ADDR_SHIFT; ++ reg |= ret << H3_EPHY_ADDR_SHIFT; + } else { + /* For SoCs without internal PHY the PHY selection bit should be + * set to 0 (external PHY). +-- +2.39.5 + diff --git a/queue-5.4/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch b/queue-5.4/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch new file mode 100644 index 0000000000..92ef55a2b2 --- /dev/null +++ b/queue-5.4/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch @@ -0,0 +1,62 @@ +From 25c2748e8f048bd8f96313badb2d2c9b412d21c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 18 May 2025 15:20:37 -0700 +Subject: sch_hfsc: Fix qlen accounting bug when using peek in hfsc_enqueue() + +From: Cong Wang + +[ Upstream commit 3f981138109f63232a5fb7165938d4c945cc1b9d ] + +When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the +child qdisc's peek() operation before incrementing sch->q.qlen and +sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may +trigger an immediate dequeue and potential packet drop. In such cases, +qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog +have not yet been updated, leading to inconsistent queue accounting. This +can leave an empty HFSC class in the active list, causing further +consequences like use-after-free. + +This patch fixes the bug by moving the increment of sch->q.qlen and +sch->qstats.backlog before the call to the child qdisc's peek() operation. +This ensures that queue length and backlog are always accurate when packet +drops or dequeues are triggered during the peek. + +Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline") +Reported-by: Mingi Cho +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com +Reviewed-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_hfsc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c +index 5d73d02b8dce7..66c33d7e243d5 100644 +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -1573,6 +1573,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) + return err; + } + ++ sch->qstats.backlog += len; ++ sch->q.qlen++; ++ + if (first && !cl->cl_nactive) { + if (cl->cl_flags & HFSC_RSC) + init_ed(cl, len); +@@ -1588,9 +1591,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) + + } + +- sch->qstats.backlog += len; +- sch->q.qlen++; +- + return NET_XMIT_SUCCESS; + } + +-- +2.39.5 + diff --git a/queue-5.4/series b/queue-5.4/series index b0909da1ca..a369ea0e07 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -168,3 +168,7 @@ nvmet-tcp-don-t-restore-null-sk_state_change.patch btrfs-correct-the-order-of-prelim_ref-arguments-in-b.patch xenbus-allow-pvh-dom0-a-non-local-xenstore.patch __legitimize_mnt-check-for-mnt_sync_umount-should-be.patch +xfrm-sanitize-marks-before-insert.patch +bridge-netfilter-fix-forwarding-of-fragmented-packet.patch +net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch +sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch diff --git a/queue-5.4/xfrm-sanitize-marks-before-insert.patch b/queue-5.4/xfrm-sanitize-marks-before-insert.patch new file mode 100644 index 0000000000..a27003ea9b --- /dev/null +++ b/queue-5.4/xfrm-sanitize-marks-before-insert.patch @@ -0,0 +1,71 @@ +From e28f4003eb773f847db10186cd7b44de3b802235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 May 2025 13:31:58 +0200 +Subject: xfrm: Sanitize marks before insert + +From: Paul Chaignon + +[ Upstream commit 0b91fda3a1f044141e1e615456ff62508c32b202 ] + +Prior to this patch, the mark is sanitized (applying the state's mask to +the state's value) only on inserts when checking if a conflicting XFRM +state or policy exists. + +We discovered in Cilium that this same sanitization does not occur +in the hot-path __xfrm_state_lookup. In the hot-path, the sk_buff's mark +is simply compared to the state's value: + + if ((mark & x->mark.m) != x->mark.v) + continue; + +Therefore, users can define unsanitized marks (ex. 0xf42/0xf00) which will +never match any packet. + +This commit updates __xfrm_state_insert and xfrm_policy_insert to store +the sanitized marks, thus removing this footgun. + +This has the side effect of changing the ip output, as the +returned mark will have the mask applied to it when printed. + +Fixes: 3d6acfa7641f ("xfrm: SA lookups with mark") +Signed-off-by: Paul Chaignon +Signed-off-by: Louis DeLosSantos +Co-developed-by: Louis DeLosSantos +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_policy.c | 3 +++ + net/xfrm/xfrm_state.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index bffac2f4b581d..78f69ee65d0ea 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -1571,6 +1571,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + struct xfrm_policy *delpol; + struct hlist_head *chain; + ++ /* Sanitize mark before store */ ++ policy->mark.v &= policy->mark.m; ++ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); + if (chain) +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index e8be18bff0960..7380aa3a5f0fe 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1244,6 +1244,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) + + list_add(&x->km.all, &net->xfrm.state_all); + ++ /* Sanitize mark before store */ ++ x->mark.v &= x->mark.m; ++ + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); +-- +2.39.5 + -- 2.47.3