From: Sasha Levin Date: Fri, 25 Jul 2025 15:06:03 +0000 (-0400) Subject: Fixes for 6.15 X-Git-Tag: v6.6.101~47 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f17bac3e2ce7a127e4b21e68ea52a61082e08ffc;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.15 Signed-off-by: Sasha Levin --- diff --git a/queue-6.15/alsa-hda-realtek-fix-mute-led-mask-on-hp-omen-16-lap.patch b/queue-6.15/alsa-hda-realtek-fix-mute-led-mask-on-hp-omen-16-lap.patch new file mode 100644 index 0000000000..d39b01f9d3 --- /dev/null +++ b/queue-6.15/alsa-hda-realtek-fix-mute-led-mask-on-hp-omen-16-lap.patch @@ -0,0 +1,44 @@ +From 0b81f1eb080b5790f2d893138a1e37e80fb399ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 22:52:24 +0530 +Subject: ALSA: hda/realtek: Fix mute LED mask on HP OMEN 16 laptop + +From: SHARAN KUMAR M + +[ Upstream commit 931837cd924048ab785eedb4cee5b276c90a2924 ] + +this patch is to fix my previous Commit i have fixed mute +led but for by This patch corrects the coefficient mask value introduced +in commit , which was intended to enable the mute LED +functionality. During testing, multiple values were evaluated, and +an incorrect value was mistakenly included in the final commit. +This update fixes that error by applying the correct mask value for +proper mute LED behavior. + +Tested on 6.15.5-arch1-1 + +Fixes: e5182305a519 ("ALSA: hda/realtek: Enable Mute LED on HP OMEN 16 Laptop xd000xx") +Signed-off-by: SHARAN KUMAR M +Link: https://patch.msgid.link/20250722172224.15359-1-sharweshraajan@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 5a6d0424bfedc..347096dc354ec 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -4753,7 +4753,7 @@ static void alc245_fixup_hp_mute_led_v1_coefbit(struct hda_codec *codec, + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef.idx = 0x0b; +- spec->mute_led_coef.mask = 1 << 3; ++ spec->mute_led_coef.mask = 3 << 2; + spec->mute_led_coef.on = 1 << 3; + spec->mute_led_coef.off = 0; + snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); +-- +2.39.5 + diff --git a/queue-6.15/asoc-mediatek-mt8365-dai-i2s-pass-correct-size-to-mt.patch b/queue-6.15/asoc-mediatek-mt8365-dai-i2s-pass-correct-size-to-mt.patch new file mode 100644 index 0000000000..6a0e1a002c --- /dev/null +++ b/queue-6.15/asoc-mediatek-mt8365-dai-i2s-pass-correct-size-to-mt.patch @@ -0,0 +1,71 @@ +From c376ce349c071b42be41c37d7256536f26d2f280 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Jul 2025 09:18:06 +0800 +Subject: ASoC: mediatek: mt8365-dai-i2s: pass correct size to + mt8365_dai_set_priv + +From: Guoqing Jiang + +[ Upstream commit 6bea85979d05470e6416a2bb504a9bcd9178304c ] + +Given mt8365_dai_set_priv allocate priv_size space to copy priv_data which +means we should pass mt8365_i2s_priv[i] or "struct mtk_afe_i2s_priv" +instead of afe_priv which has the size of "struct mt8365_afe_private". + +Otherwise the KASAN complains about. + +[ 59.389765] BUG: KASAN: global-out-of-bounds in mt8365_dai_set_priv+0xc8/0x168 [snd_soc_mt8365_pcm] +... +[ 59.394789] Call trace: +[ 59.395167] dump_backtrace+0xa0/0x128 +[ 59.395733] show_stack+0x20/0x38 +[ 59.396238] dump_stack_lvl+0xe8/0x148 +[ 59.396806] print_report+0x37c/0x5e0 +[ 59.397358] kasan_report+0xac/0xf8 +[ 59.397885] kasan_check_range+0xe8/0x190 +[ 59.398485] asan_memcpy+0x3c/0x98 +[ 59.399022] mt8365_dai_set_priv+0xc8/0x168 [snd_soc_mt8365_pcm] +[ 59.399928] mt8365_dai_i2s_register+0x1e8/0x2b0 [snd_soc_mt8365_pcm] +[ 59.400893] mt8365_afe_pcm_dev_probe+0x4d0/0xdf0 [snd_soc_mt8365_pcm] +[ 59.401873] platform_probe+0xcc/0x228 +[ 59.402442] really_probe+0x340/0x9e8 +[ 59.402992] driver_probe_device+0x16c/0x3f8 +[ 59.403638] driver_probe_device+0x64/0x1d8 +[ 59.404256] driver_attach+0x1dc/0x4c8 +[ 59.404840] bus_for_each_dev+0x100/0x190 +[ 59.405442] driver_attach+0x44/0x68 +[ 59.405980] bus_add_driver+0x23c/0x500 +[ 59.406550] driver_register+0xf8/0x3d0 +[ 59.407122] platform_driver_register+0x68/0x98 +[ 59.407810] mt8365_afe_pcm_driver_init+0x2c/0xff8 [snd_soc_mt8365_pcm] + +Fixes: 402bbb13a195 ("ASoC: mediatek: mt8365: Add I2S DAI support") +Signed-off-by: Guoqing Jiang +Reviewed-by: AngeloGioacchino Del Regno +Link: https://patch.msgid.link/20250710011806.134507-1-guoqing.jiang@canonical.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/mediatek/mt8365/mt8365-dai-i2s.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c +index cae51756cead8..cb9beb172ed59 100644 +--- a/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c ++++ b/sound/soc/mediatek/mt8365/mt8365-dai-i2s.c +@@ -812,11 +812,10 @@ static const struct snd_soc_dapm_route mtk_dai_i2s_routes[] = { + static int mt8365_dai_i2s_set_priv(struct mtk_base_afe *afe) + { + int i, ret; +- struct mt8365_afe_private *afe_priv = afe->platform_priv; + + for (i = 0; i < DAI_I2S_NUM; i++) { + ret = mt8365_dai_set_priv(afe, mt8365_i2s_priv[i].id, +- sizeof(*afe_priv), ++ sizeof(mt8365_i2s_priv[i]), + &mt8365_i2s_priv[i]); + if (ret) + return ret; +-- +2.39.5 + diff --git a/queue-6.15/can-netlink-can_changelink-fix-null-pointer-deref-of.patch b/queue-6.15/can-netlink-can_changelink-fix-null-pointer-deref-of.patch new file mode 100644 index 0000000000..71705a3060 --- /dev/null +++ b/queue-6.15/can-netlink-can_changelink-fix-null-pointer-deref-of.patch @@ -0,0 +1,119 @@ +From 6f789e359a29b1ae95c32d9b1aa9fa696b14cec0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Jul 2025 22:35:46 +0200 +Subject: can: netlink: can_changelink(): fix NULL pointer deref of struct + can_priv::do_set_mode + +From: Marc Kleine-Budde + +[ Upstream commit c1f3f9797c1f44a762e6f5f72520b2e520537b52 ] + +Andrei Lalaev reported a NULL pointer deref when a CAN device is +restarted from Bus Off and the driver does not implement the struct +can_priv::do_set_mode callback. + +There are 2 code path that call struct can_priv::do_set_mode: +- directly by a manual restart from the user space, via + can_changelink() +- delayed automatic restart after bus off (deactivated by default) + +To prevent the NULL pointer deference, refuse a manual restart or +configure the automatic restart delay in can_changelink() and report +the error via extack to user space. + +As an additional safety measure let can_restart() return an error if +can_priv::do_set_mode is not set instead of dereferencing it +unchecked. + +Reported-by: Andrei Lalaev +Closes: https://lore.kernel.org/all/20250714175520.307467-1-andrey.lalaev@gmail.com +Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") +Link: https://patch.msgid.link/20250718-fix-nullptr-deref-do_set_mode-v1-1-0b520097bb96@pengutronix.de +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + drivers/net/can/dev/dev.c | 12 +++++++++--- + drivers/net/can/dev/netlink.c | 12 ++++++++++++ + 2 files changed, 21 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c +index 5ec3170b896a4..3fa805ac2c65b 100644 +--- a/drivers/net/can/dev/dev.c ++++ b/drivers/net/can/dev/dev.c +@@ -145,13 +145,16 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, + EXPORT_SYMBOL_GPL(can_change_state); + + /* CAN device restart for bus-off recovery */ +-static void can_restart(struct net_device *dev) ++static int can_restart(struct net_device *dev) + { + struct can_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + struct can_frame *cf; + int err; + ++ if (!priv->do_set_mode) ++ return -EOPNOTSUPP; ++ + if (netif_carrier_ok(dev)) + netdev_err(dev, "Attempt to restart for bus-off recovery, but carrier is OK?\n"); + +@@ -173,10 +176,14 @@ static void can_restart(struct net_device *dev) + if (err) { + netdev_err(dev, "Restart failed, error %pe\n", ERR_PTR(err)); + netif_carrier_off(dev); ++ ++ return err; + } else { + netdev_dbg(dev, "Restarted\n"); + priv->can_stats.restarts++; + } ++ ++ return 0; + } + + static void can_restart_work(struct work_struct *work) +@@ -201,9 +208,8 @@ int can_restart_now(struct net_device *dev) + return -EBUSY; + + cancel_delayed_work_sync(&priv->restart_work); +- can_restart(dev); + +- return 0; ++ return can_restart(dev); + } + + /* CAN bus-off +diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c +index f1db9b7ffd4d0..d5aa8da87961e 100644 +--- a/drivers/net/can/dev/netlink.c ++++ b/drivers/net/can/dev/netlink.c +@@ -285,6 +285,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], + } + + if (data[IFLA_CAN_RESTART_MS]) { ++ if (!priv->do_set_mode) { ++ NL_SET_ERR_MSG(extack, ++ "Device doesn't support restart from Bus Off"); ++ return -EOPNOTSUPP; ++ } ++ + /* Do not allow changing restart delay while running */ + if (dev->flags & IFF_UP) + return -EBUSY; +@@ -292,6 +298,12 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], + } + + if (data[IFLA_CAN_RESTART]) { ++ if (!priv->do_set_mode) { ++ NL_SET_ERR_MSG(extack, ++ "Device doesn't support restart from Bus Off"); ++ return -EOPNOTSUPP; ++ } ++ + /* Do not allow a restart while not running */ + if (!(dev->flags & IFF_UP)) + return -EINVAL; +-- +2.39.5 + diff --git a/queue-6.15/drm-bridge-ti-sn65dsi86-remove-extra-semicolon-in-ti.patch b/queue-6.15/drm-bridge-ti-sn65dsi86-remove-extra-semicolon-in-ti.patch new file mode 100644 index 0000000000..cc6403eb89 --- /dev/null +++ b/queue-6.15/drm-bridge-ti-sn65dsi86-remove-extra-semicolon-in-ti.patch @@ -0,0 +1,40 @@ +From 88e78286996db4a5c9a5285bef9d0ea4639a87b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Jul 2025 13:06:32 -0700 +Subject: drm/bridge: ti-sn65dsi86: Remove extra semicolon in + ti_sn_bridge_probe() + +From: Douglas Anderson + +[ Upstream commit 15a7ca747d9538c2ad8b0c81dd4c1261e0736c82 ] + +As reported by the kernel test robot, a recent patch introduced an +unnecessary semicolon. Remove it. + +Fixes: 55e8ff842051 ("drm/bridge: ti-sn65dsi86: Add HPD for DisplayPort connector type") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202506301704.0SBj6ply-lkp@intel.com/ +Reviewed-by: Devarsh Thakkar +Signed-off-by: Douglas Anderson +Link: https://lore.kernel.org/r/20250714130631.1.I1cfae3222e344a3b3c770d079ee6b6f7f3b5d636@changeid +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/bridge/ti-sn65dsi86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c +index 4ea13e5a3a54a..48766b6abd29a 100644 +--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c ++++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c +@@ -1351,7 +1351,7 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); +- }; ++ } + + drm_bridge_add(&pdata->bridge); + +-- +2.39.5 + diff --git a/queue-6.15/i40e-report-vf-tx_dropped-with-tx_errors-instead-of-.patch b/queue-6.15/i40e-report-vf-tx_dropped-with-tx_errors-instead-of-.patch new file mode 100644 index 0000000000..579ac54c94 --- /dev/null +++ b/queue-6.15/i40e-report-vf-tx_dropped-with-tx_errors-instead-of-.patch @@ -0,0 +1,108 @@ +From d4c0c9c81da80b4119860d24b85020defac8e4b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Jun 2025 15:52:40 -0400 +Subject: i40e: report VF tx_dropped with tx_errors instead of tx_discards + +From: Dennis Chen + +[ Upstream commit 50b2af451597ca6eefe9d4543f8bbf8de8aa00e7 ] + +Currently the tx_dropped field in VF stats is not updated correctly +when reading stats from the PF. This is because it reads from +i40e_eth_stats.tx_discards which seems to be unused for per VSI stats, +as it is not updated by i40e_update_eth_stats() and the corresponding +register, GLV_TDPC, is not implemented[1]. + +Use i40e_eth_stats.tx_errors instead, which is actually updated by +i40e_update_eth_stats() by reading from GLV_TEPC. + +To test, create a VF and try to send bad packets through it: + +$ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs +$ cat test.py +from scapy.all import * + +vlan_pkt = Ether(dst="ff:ff:ff:ff:ff:ff") / Dot1Q(vlan=999) / IP(dst="192.168.0.1") / ICMP() +ttl_pkt = IP(dst="8.8.8.8", ttl=0) / ICMP() + +print("Send packet with bad VLAN tag") +sendp(vlan_pkt, iface="enp2s0f0v0") +print("Send packet with TTL=0") +sendp(ttl_pkt, iface="enp2s0f0v0") +$ ip -s link show dev enp2s0f0 +16: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 3c:ec:ef:b7:e0:ac brd ff:ff:ff:ff:ff:ff + RX: bytes packets errors dropped missed mcast + 0 0 0 0 0 0 + TX: bytes packets errors dropped carrier collsns + 0 0 0 0 0 0 + vf 0 link/ether e2:c6:fd:c1:1e:92 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off + RX: bytes packets mcast bcast dropped + 0 0 0 0 0 + TX: bytes packets dropped + 0 0 0 +$ python test.py +Send packet with bad VLAN tag +. +Sent 1 packets. +Send packet with TTL=0 +. +Sent 1 packets. +$ ip -s link show dev enp2s0f0 +16: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 3c:ec:ef:b7:e0:ac brd ff:ff:ff:ff:ff:ff + RX: bytes packets errors dropped missed mcast + 0 0 0 0 0 0 + TX: bytes packets errors dropped carrier collsns + 0 0 0 0 0 0 + vf 0 link/ether e2:c6:fd:c1:1e:92 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off + RX: bytes packets mcast bcast dropped + 0 0 0 0 0 + TX: bytes packets dropped + 0 0 0 + +A packet with non-existent VLAN tag and a packet with TTL = 0 are sent, +but tx_dropped is not incremented. + +After patch: + +$ ip -s link show dev enp2s0f0 +19: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 3c:ec:ef:b7:e0:ac brd ff:ff:ff:ff:ff:ff + RX: bytes packets errors dropped missed mcast + 0 0 0 0 0 0 + TX: bytes packets errors dropped carrier collsns + 0 0 0 0 0 0 + vf 0 link/ether 4a:b7:3d:37:f7:56 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off + RX: bytes packets mcast bcast dropped + 0 0 0 0 0 + TX: bytes packets dropped + 0 0 2 + +Fixes: dc645daef9af5bcbd9c ("i40e: implement VF stats NDO") +Signed-off-by: Dennis Chen +Link: https://www.intel.com/content/www/us/en/content-details/596333/intel-ethernet-controller-x710-tm4-at2-carlsville-datasheet.html +Reviewed-by: Simon Horman +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +index 88e6bef69342c..2dbe38eb94941 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +@@ -5006,7 +5006,7 @@ int i40e_get_vf_stats(struct net_device *netdev, int vf_id, + vf_stats->broadcast = stats->rx_broadcast; + vf_stats->multicast = stats->rx_multicast; + vf_stats->rx_dropped = stats->rx_discards + stats->rx_discards_other; +- vf_stats->tx_dropped = stats->tx_discards; ++ vf_stats->tx_dropped = stats->tx_errors; + + return 0; + } +-- +2.39.5 + diff --git a/queue-6.15/i40e-when-removing-vf-mac-filters-only-check-pf-set-.patch b/queue-6.15/i40e-when-removing-vf-mac-filters-only-check-pf-set-.patch new file mode 100644 index 0000000000..22908411a3 --- /dev/null +++ b/queue-6.15/i40e-when-removing-vf-mac-filters-only-check-pf-set-.patch @@ -0,0 +1,59 @@ +From 12f60b62bea460ea19f9607867084d240c1c4928 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Jun 2025 09:29:18 +1000 +Subject: i40e: When removing VF MAC filters, only check PF-set MAC + +From: Jamie Bainbridge + +[ Upstream commit 5a0df02999dbe838c3feed54b1d59e9445f68b89 ] + +When the PF is processing an Admin Queue message to delete a VF's MACs +from the MAC filter, we currently check if the PF set the MAC and if +the VF is trusted. + +This results in undesirable behaviour, where if a trusted VF with a +PF-set MAC sets itself down (which sends an AQ message to delete the +VF's MAC filters) then the VF MAC is erased from the interface. + +This results in the VF losing its PF-set MAC which should not happen. + +There is no need to check for trust at all, because an untrusted VF +cannot change its own MAC. The only check needed is whether the PF set +the MAC. If the PF set the MAC, then don't erase the MAC on link-down. + +Resolve this by changing the deletion check only for PF-set MAC. + +(the out-of-tree driver has also intentionally removed the check for VF +trust here with OOT driver version 2.26.8, this changes the Linux kernel +driver behaviour and comment to match the OOT driver behaviour) + +Fixes: ea2a1cfc3b201 ("i40e: Fix VF MAC filter removal") +Signed-off-by: Jamie Bainbridge +Reviewed-by: Simon Horman +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +index 2dbe38eb94941..7ccfc1191ae56 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +@@ -3137,10 +3137,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set +- * administratively by PF or if VF is trusted. ++ * administratively by PF. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { +- if (i40e_can_vf_change_mac(vf)) ++ if (!vf->pf_set_mac) + was_unimac_deleted = true; + else + continue; +-- +2.39.5 + diff --git a/queue-6.15/net-appletalk-fix-use-after-free-in-aarp-proxy-probe.patch b/queue-6.15/net-appletalk-fix-use-after-free-in-aarp-proxy-probe.patch new file mode 100644 index 0000000000..0e036fc2de --- /dev/null +++ b/queue-6.15/net-appletalk-fix-use-after-free-in-aarp-proxy-probe.patch @@ -0,0 +1,185 @@ +From 3dea38f5e6666123f456e37676bf036503994df6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Jul 2025 01:28:43 +0000 +Subject: net: appletalk: Fix use-after-free in AARP proxy probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kito Xu (veritas501) + +[ Upstream commit 6c4a92d07b0850342d3becf2e608f805e972467c ] + +The AARP proxy‐probe routine (aarp_proxy_probe_network) sends a probe, +releases the aarp_lock, sleeps, then re-acquires the lock. During that +window an expire timer thread (__aarp_expire_timer) can remove and +kfree() the same entry, leading to a use-after-free. + +race condition: + + cpu 0 | cpu 1 + atalk_sendmsg() | atif_proxy_probe_device() + aarp_send_ddp() | aarp_proxy_probe_network() + mod_timer() | lock(aarp_lock) // LOCK!! + timeout around 200ms | alloc(aarp_entry) + and then call | proxies[hash] = aarp_entry + aarp_expire_timeout() | aarp_send_probe() + | unlock(aarp_lock) // UNLOCK!! + lock(aarp_lock) // LOCK!! | msleep(100); + __aarp_expire_timer(&proxies[ct]) | + free(aarp_entry) | + unlock(aarp_lock) // UNLOCK!! | + | lock(aarp_lock) // LOCK!! + | UAF aarp_entry !! + +================================================================== +BUG: KASAN: slab-use-after-free in aarp_proxy_probe_network+0x560/0x630 net/appletalk/aarp.c:493 +Read of size 4 at addr ffff8880123aa360 by task repro/13278 + +CPU: 3 UID: 0 PID: 13278 Comm: repro Not tainted 6.15.2 #3 PREEMPT(full) +Call Trace: + + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:120 + print_address_description mm/kasan/report.c:408 [inline] + print_report+0xc1/0x630 mm/kasan/report.c:521 + kasan_report+0xca/0x100 mm/kasan/report.c:634 + aarp_proxy_probe_network+0x560/0x630 net/appletalk/aarp.c:493 + atif_proxy_probe_device net/appletalk/ddp.c:332 [inline] + atif_ioctl+0xb58/0x16c0 net/appletalk/ddp.c:857 + atalk_ioctl+0x198/0x2f0 net/appletalk/ddp.c:1818 + sock_do_ioctl+0xdc/0x260 net/socket.c:1190 + sock_ioctl+0x239/0x6a0 net/socket.c:1311 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:906 [inline] + __se_sys_ioctl fs/ioctl.c:892 [inline] + __x64_sys_ioctl+0x194/0x200 fs/ioctl.c:892 + do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] + do_syscall_64+0xcb/0x250 arch/x86/entry/syscall_64.c:94 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + + +Allocated: + aarp_alloc net/appletalk/aarp.c:382 [inline] + aarp_proxy_probe_network+0xd8/0x630 net/appletalk/aarp.c:468 + atif_proxy_probe_device net/appletalk/ddp.c:332 [inline] + atif_ioctl+0xb58/0x16c0 net/appletalk/ddp.c:857 + atalk_ioctl+0x198/0x2f0 net/appletalk/ddp.c:1818 + +Freed: + kfree+0x148/0x4d0 mm/slub.c:4841 + __aarp_expire net/appletalk/aarp.c:90 [inline] + __aarp_expire_timer net/appletalk/aarp.c:261 [inline] + aarp_expire_timeout+0x480/0x6e0 net/appletalk/aarp.c:317 + +The buggy address belongs to the object at ffff8880123aa300 + which belongs to the cache kmalloc-192 of size 192 +The buggy address is located 96 bytes inside of + freed 192-byte region [ffff8880123aa300, ffff8880123aa3c0) + +Memory state around the buggy address: + ffff8880123aa200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff8880123aa280: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc +>ffff8880123aa300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8880123aa380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc + ffff8880123aa400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +================================================================== + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kito Xu (veritas501) +Link: https://patch.msgid.link/20250717012843.880423-1-hxzene@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/appletalk/aarp.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c +index 9c787e2e4b173..4744e3fd45447 100644 +--- a/net/appletalk/aarp.c ++++ b/net/appletalk/aarp.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; + int sysctl_aarp_tick_time = AARP_TICK_TIME; +@@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; + /* Lists of aarp entries */ + /** + * struct aarp_entry - AARP entry ++ * @refcnt: Reference count + * @last_sent: Last time we xmitted the aarp request + * @packet_queue: Queue of frames wait for resolution + * @status: Used for proxy AARP +@@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; + * @next: Next entry in chain + */ + struct aarp_entry { ++ refcount_t refcnt; + /* These first two are only used for unresolved entries */ + unsigned long last_sent; + struct sk_buff_head packet_queue; +@@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock); + /* Used to walk the list and purge/kick entries. */ + static struct timer_list aarp_timer; + ++static inline void aarp_entry_get(struct aarp_entry *a) ++{ ++ refcount_inc(&a->refcnt); ++} ++ ++static inline void aarp_entry_put(struct aarp_entry *a) ++{ ++ if (refcount_dec_and_test(&a->refcnt)) ++ kfree(a); ++} ++ + /* + * Delete an aarp queue + * +@@ -87,7 +101,7 @@ static struct timer_list aarp_timer; + static void __aarp_expire(struct aarp_entry *a) + { + skb_queue_purge(&a->packet_queue); +- kfree(a); ++ aarp_entry_put(a); + } + + /* +@@ -380,9 +394,11 @@ static void aarp_purge(void) + static struct aarp_entry *aarp_alloc(void) + { + struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC); ++ if (!a) ++ return NULL; + +- if (a) +- skb_queue_head_init(&a->packet_queue); ++ refcount_set(&a->refcnt, 1); ++ skb_queue_head_init(&a->packet_queue); + return a; + } + +@@ -477,6 +493,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) + entry->dev = atif->dev; + + write_lock_bh(&aarp_lock); ++ aarp_entry_get(entry); + + hash = sa->s_node % (AARP_HASH_SIZE - 1); + entry->next = proxies[hash]; +@@ -502,6 +519,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) + retval = 1; + } + ++ aarp_entry_put(entry); + write_unlock_bh(&aarp_lock); + out: + return retval; +-- +2.39.5 + diff --git a/queue-6.15/net-hns3-default-enable-tx-bounce-buffer-when-smmu-e.patch b/queue-6.15/net-hns3-default-enable-tx-bounce-buffer-when-smmu-e.patch new file mode 100644 index 0000000000..c923e5540b --- /dev/null +++ b/queue-6.15/net-hns3-default-enable-tx-bounce-buffer-when-smmu-e.patch @@ -0,0 +1,125 @@ +From 5f88f003387323fdd16522c718ecdb62b4a3d5f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 20:54:23 +0800 +Subject: net: hns3: default enable tx bounce buffer when smmu enabled + +From: Jijie Shao + +[ Upstream commit 49ade8630f36e9dca2395592cfb0b7deeb07e746 ] + +The SMMU engine on HIP09 chip has a hardware issue. +SMMU pagetable prefetch features may prefetch and use a invalid PTE +even the PTE is valid at that time. This will cause the device trigger +fake pagefaults. The solution is to avoid prefetching by adding a +SYNC command when smmu mapping a iova. But the performance of nic has a +sharp drop. Then we do this workaround, always enable tx bounce buffer, +avoid mapping/unmapping on TX path. + +This issue only affects HNS3, so we always enable +tx bounce buffer when smmu enabled to improve performance. + +Fixes: 295ba232a8c3 ("net: hns3: add device version to replace pci revision") +Signed-off-by: Jian Shen +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722125423.1270673-5-shaojijie@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/hisilicon/hns3/hns3_enet.c | 31 +++++++++++++++++++ + .../net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ + 2 files changed, 33 insertions(+) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index b03b8758c7774..aaa803563bd2e 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1039,6 +1040,8 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, + static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) + { + u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; ++ struct net_device *netdev = ring_to_netdev(ring); ++ struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hns3_tx_spare *tx_spare; + struct page *page; + dma_addr_t dma; +@@ -1080,6 +1083,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) + tx_spare->buf = page_address(page); + tx_spare->len = PAGE_SIZE << order; + ring->tx_spare = tx_spare; ++ ring->tx_copybreak = priv->tx_copybreak; + return; + + dma_mapping_error: +@@ -4874,6 +4878,30 @@ static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) + devm_kfree(&pdev->dev, priv->tqp_vector); + } + ++static void hns3_update_tx_spare_buf_config(struct hns3_nic_priv *priv) ++{ ++#define HNS3_MIN_SPARE_BUF_SIZE (2 * 1024 * 1024) ++#define HNS3_MAX_PACKET_SIZE (64 * 1024) ++ ++ struct iommu_domain *domain = iommu_get_domain_for_dev(priv->dev); ++ struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(priv->ae_handle); ++ struct hnae3_handle *handle = priv->ae_handle; ++ ++ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) ++ return; ++ ++ if (!(domain && iommu_is_dma_domain(domain))) ++ return; ++ ++ priv->min_tx_copybreak = HNS3_MAX_PACKET_SIZE; ++ priv->min_tx_spare_buf_size = HNS3_MIN_SPARE_BUF_SIZE; ++ ++ if (priv->tx_copybreak < priv->min_tx_copybreak) ++ priv->tx_copybreak = priv->min_tx_copybreak; ++ if (handle->kinfo.tx_spare_buf_size < priv->min_tx_spare_buf_size) ++ handle->kinfo.tx_spare_buf_size = priv->min_tx_spare_buf_size; ++} ++ + static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, + unsigned int ring_type) + { +@@ -5107,6 +5135,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) + int i, j; + int ret; + ++ hns3_update_tx_spare_buf_config(priv); + for (i = 0; i < ring_num; i++) { + ret = hns3_alloc_ring_memory(&priv->ring[i]); + if (ret) { +@@ -5311,6 +5340,8 @@ static int hns3_client_init(struct hnae3_handle *handle) + priv->ae_handle = handle; + priv->tx_timeout_count = 0; + priv->max_non_tso_bd_num = ae_dev->dev_specs.max_non_tso_bd_num; ++ priv->min_tx_copybreak = 0; ++ priv->min_tx_spare_buf_size = 0; + set_bit(HNS3_NIC_STATE_DOWN, &priv->state); + + handle->msg_enable = netif_msg_init(debug, DEFAULT_MSG_LEVEL); +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +index d36c4ed16d8dd..caf7a4df85852 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +@@ -596,6 +596,8 @@ struct hns3_nic_priv { + struct hns3_enet_coalesce rx_coal; + u32 tx_copybreak; + u32 rx_copybreak; ++ u32 min_tx_copybreak; ++ u32 min_tx_spare_buf_size; + }; + + union l3_hdr_info { +-- +2.39.5 + diff --git a/queue-6.15/net-hns3-disable-interrupt-when-ptp-init-failed.patch b/queue-6.15/net-hns3-disable-interrupt-when-ptp-init-failed.patch new file mode 100644 index 0000000000..10cd2a88d9 --- /dev/null +++ b/queue-6.15/net-hns3-disable-interrupt-when-ptp-init-failed.patch @@ -0,0 +1,66 @@ +From 7abc0aa9aba19198012e927378f20f69bea2d2eb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 20:54:21 +0800 +Subject: net: hns3: disable interrupt when ptp init failed + +From: Yonglong Liu + +[ Upstream commit cde304655f25d94a996c45b0f9956e7dcc2bc4c0 ] + +When ptp init failed, we'd better disable the interrupt and clear the +flag, to avoid early report interrupt at next probe. + +Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") +Signed-off-by: Yonglong Liu +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722125423.1270673-3-shaojijie@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +index ec581d4b696f5..4bd52eab39145 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +@@ -497,14 +497,14 @@ int hclge_ptp_init(struct hclge_dev *hdev) + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to init freq, ret = %d\n", ret); +- goto out; ++ goto out_clear_int; + } + + ret = hclge_ptp_set_ts_mode(hdev, &hdev->ptp->ts_cfg); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to init ts mode, ret = %d\n", ret); +- goto out; ++ goto out_clear_int; + } + + ktime_get_real_ts64(&ts); +@@ -512,7 +512,7 @@ int hclge_ptp_init(struct hclge_dev *hdev) + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to init ts time, ret = %d\n", ret); +- goto out; ++ goto out_clear_int; + } + + set_bit(HCLGE_STATE_PTP_EN, &hdev->state); +@@ -520,6 +520,9 @@ int hclge_ptp_init(struct hclge_dev *hdev) + + return 0; + ++out_clear_int: ++ clear_bit(HCLGE_PTP_FLAG_EN, &hdev->ptp->flags); ++ hclge_ptp_int_en(hdev, false); + out: + hclge_ptp_destroy_clock(hdev); + +-- +2.39.5 + diff --git a/queue-6.15/net-hns3-fix-concurrent-setting-vlan-filter-issue.patch b/queue-6.15/net-hns3-fix-concurrent-setting-vlan-filter-issue.patch new file mode 100644 index 0000000000..5489fb7187 --- /dev/null +++ b/queue-6.15/net-hns3-fix-concurrent-setting-vlan-filter-issue.patch @@ -0,0 +1,105 @@ +From 944efc33d7dc6dc9fa69bae46dca3a19f1256aa5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 20:54:20 +0800 +Subject: net: hns3: fix concurrent setting vlan filter issue + +From: Jian Shen + +[ Upstream commit 4555f8f8b6aa46940f55feb6a07704c2935b6d6e ] + +The vport->req_vlan_fltr_en may be changed concurrently by function +hclge_sync_vlan_fltr_state() called in periodic work task and +function hclge_enable_vport_vlan_filter() called by user configuration. +It may cause the user configuration inoperative. Fixes it by protect +the vport->req_vlan_fltr by vport_lock. + +Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") +Signed-off-by: Jian Shen +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722125423.1270673-2-shaojijie@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 36 +++++++++++-------- + 1 file changed, 21 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 3e28a08934abd..4ea19c089578e 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -9576,33 +9576,36 @@ static bool hclge_need_enable_vport_vlan_filter(struct hclge_vport *vport) + return false; + } + +-int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en) ++static int __hclge_enable_vport_vlan_filter(struct hclge_vport *vport, ++ bool request_en) + { +- struct hclge_dev *hdev = vport->back; + bool need_en; + int ret; + +- mutex_lock(&hdev->vport_lock); +- +- vport->req_vlan_fltr_en = request_en; +- + need_en = hclge_need_enable_vport_vlan_filter(vport); +- if (need_en == vport->cur_vlan_fltr_en) { +- mutex_unlock(&hdev->vport_lock); ++ if (need_en == vport->cur_vlan_fltr_en) + return 0; +- } + + ret = hclge_set_vport_vlan_filter(vport, need_en); +- if (ret) { +- mutex_unlock(&hdev->vport_lock); ++ if (ret) + return ret; +- } + + vport->cur_vlan_fltr_en = need_en; + ++ return 0; ++} ++ ++int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en) ++{ ++ struct hclge_dev *hdev = vport->back; ++ int ret; ++ ++ mutex_lock(&hdev->vport_lock); ++ vport->req_vlan_fltr_en = request_en; ++ ret = __hclge_enable_vport_vlan_filter(vport, request_en); + mutex_unlock(&hdev->vport_lock); + +- return 0; ++ return ret; + } + + static int hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable) +@@ -10623,16 +10626,19 @@ static void hclge_sync_vlan_fltr_state(struct hclge_dev *hdev) + &vport->state)) + continue; + +- ret = hclge_enable_vport_vlan_filter(vport, +- vport->req_vlan_fltr_en); ++ mutex_lock(&hdev->vport_lock); ++ ret = __hclge_enable_vport_vlan_filter(vport, ++ vport->req_vlan_fltr_en); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to sync vlan filter state for vport%u, ret = %d\n", + vport->vport_id, ret); + set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + &vport->state); ++ mutex_unlock(&hdev->vport_lock); + return; + } ++ mutex_unlock(&hdev->vport_lock); + } + } + +-- +2.39.5 + diff --git a/queue-6.15/net-hns3-fixed-vf-get-max-channels-bug.patch b/queue-6.15/net-hns3-fixed-vf-get-max-channels-bug.patch new file mode 100644 index 0000000000..f8b60fb1e1 --- /dev/null +++ b/queue-6.15/net-hns3-fixed-vf-get-max-channels-bug.patch @@ -0,0 +1,45 @@ +From 38e38a8f2db17cda0b23dc978231fab24f3dc9e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 20:54:22 +0800 +Subject: net: hns3: fixed vf get max channels bug + +From: Jian Shen + +[ Upstream commit b3e75c0bcc53f647311960bc1b0970b9b480ca5a ] + +Currently, the queried maximum of vf channels is the maximum of channels +supported by each TC. However, the actual maximum of channels is +the maximum of channels supported by the device. + +Fixes: 849e46077689 ("net: hns3: add ethtool_ops.get_channels support for VF") +Signed-off-by: Jian Shen +Signed-off-by: Hao Lan +Signed-off-by: Jijie Shao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722125423.1270673-4-shaojijie@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +index dada42e7e0ec9..27d10aeafb2b1 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -3094,11 +3094,7 @@ static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) + + static u32 hclgevf_get_max_channels(struct hclgevf_dev *hdev) + { +- struct hnae3_handle *nic = &hdev->nic; +- struct hnae3_knic_private_info *kinfo = &nic->kinfo; +- +- return min_t(u32, hdev->rss_size_max, +- hdev->num_tqps / kinfo->tc_info.num_tc); ++ return min(hdev->rss_size_max, hdev->num_tqps); + } + + /** +-- +2.39.5 + diff --git a/queue-6.15/net-mlx5-e-switch-fix-peer-miss-rules-to-use-peer-es.patch b/queue-6.15/net-mlx5-e-switch-fix-peer-miss-rules-to-use-peer-es.patch new file mode 100644 index 0000000000..a1dacc2ca6 --- /dev/null +++ b/queue-6.15/net-mlx5-e-switch-fix-peer-miss-rules-to-use-peer-es.patch @@ -0,0 +1,245 @@ +From b1ead7ab5f1548920ecf89bc5494fd8e4ca8f380 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Jul 2025 15:06:10 +0300 +Subject: net/mlx5: E-Switch, Fix peer miss rules to use peer eswitch + +From: Shahar Shitrit + +[ Upstream commit 5b4c56ad4da0aa00b258ab50b1f5775b7d3108c7 ] + +In the original design, it is assumed local and peer eswitches have the +same number of vfs. However, in new firmware, local and peer eswitches +can have different number of vfs configured by mlxconfig. In such +configuration, it is incorrect to derive the number of vfs from the +local device's eswitch. + +Fix this by updating the peer miss rules add and delete functions to use +the peer device's eswitch and vf count instead of the local device's +information, ensuring correct behavior regardless of vf configuration +differences. + +Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") +Signed-off-by: Shahar Shitrit +Reviewed-by: Mark Bloch +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/1752753970-261832-3-git-send-email-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/eswitch_offloads.c | 108 +++++++++--------- + 1 file changed, 54 insertions(+), 54 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +index 0e3a977d53329..bee906661282a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -1182,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) + { ++ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; +- /* total vports is the same for both e-switches */ +- int nvports = esw->total_vports; + struct mlx5_flow_handle *flow; ++ struct mlx5_vport *peer_vport; + struct mlx5_flow_spec *spec; +- struct mlx5_vport *vport; + int err, pfindex; + unsigned long i; + void *misc; + +- if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) ++ if (!MLX5_VPORT_MANAGER(peer_dev) && ++ !mlx5_core_is_ecpf_esw_manager(peer_dev)) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); +@@ -1203,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + + peer_miss_rules_setup(esw, peer_dev, spec, &dest); + +- flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); ++ flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; +@@ -1213,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + +- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); +- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, +- spec, MLX5_VPORT_PF); ++ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); ++ esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, ++ MLX5_VPORT_PF); + + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); +@@ -1224,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + err = PTR_ERR(flow); + goto add_pf_flow_err; + } +- flows[vport->index] = flow; ++ flows[peer_vport->index] = flow; + } + +- if (mlx5_ecpf_vport_exists(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); ++ if (mlx5_ecpf_vport_exists(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); +@@ -1236,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } +- flows[vport->index] = flow; ++ flows[peer_vport->index] = flow; + } + +- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { ++ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, +- peer_dev->priv.eswitch, +- spec, vport->vport); ++ peer_esw, ++ spec, peer_vport->vport); + + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); +@@ -1250,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + err = PTR_ERR(flow); + goto add_vf_flow_err; + } +- flows[vport->index] = flow; ++ flows[peer_vport->index] = flow; + } + +- if (mlx5_core_ec_sriov_enabled(esw->dev)) { +- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { +- if (i >= mlx5_core_max_ec_vfs(peer_dev)) +- break; +- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, +- spec, vport->vport); ++ if (mlx5_core_ec_sriov_enabled(peer_dev)) { ++ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_ec_vfs(peer_dev)) { ++ esw_set_peer_miss_rule_source_port(esw, peer_esw, ++ spec, ++ peer_vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } +- flows[vport->index] = flow; ++ flows[peer_vport->index] = flow; + } + } + +@@ -1282,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + return 0; + + add_ec_vf_flow_err: +- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { +- if (!flows[vport->index]) ++ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_ec_vfs(peer_dev)) { ++ if (!flows[peer_vport->index]) + continue; +- mlx5_del_flow_rules(flows[vport->index]); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + add_vf_flow_err: +- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { +- if (!flows[vport->index]) ++ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_vfs(peer_dev)) { ++ if (!flows[peer_vport->index]) + continue; +- mlx5_del_flow_rules(flows[vport->index]); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } +- if (mlx5_ecpf_vport_exists(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); +- mlx5_del_flow_rules(flows[vport->index]); ++ if (mlx5_ecpf_vport_exists(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + add_ecpf_flow_err: +- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); +- mlx5_del_flow_rules(flows[vport->index]); ++ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); +@@ -1313,37 +1316,34 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) + { ++ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; + u16 peer_index = mlx5_get_dev_index(peer_dev); + struct mlx5_flow_handle **flows; +- struct mlx5_vport *vport; ++ struct mlx5_vport *peer_vport; + unsigned long i; + + flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + if (!flows) + return; + +- if (mlx5_core_ec_sriov_enabled(esw->dev)) { +- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { +- /* The flow for a particular vport could be NULL if the other ECPF +- * has fewer or no VFs enabled +- */ +- if (!flows[vport->index]) +- continue; +- mlx5_del_flow_rules(flows[vport->index]); +- } ++ if (mlx5_core_ec_sriov_enabled(peer_dev)) { ++ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_ec_vfs(peer_dev)) ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + +- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) +- mlx5_del_flow_rules(flows[vport->index]); ++ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, ++ mlx5_core_max_vfs(peer_dev)) ++ mlx5_del_flow_rules(flows[peer_vport->index]); + +- if (mlx5_ecpf_vport_exists(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); +- mlx5_del_flow_rules(flows[vport->index]); ++ if (mlx5_ecpf_vport_exists(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + +- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { +- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); +- mlx5_del_flow_rules(flows[vport->index]); ++ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { ++ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); ++ mlx5_del_flow_rules(flows[peer_vport->index]); + } + + kvfree(flows); +-- +2.39.5 + diff --git a/queue-6.15/net-mlx5-fix-memory-leak-in-cmd_exec.patch b/queue-6.15/net-mlx5-fix-memory-leak-in-cmd_exec.patch new file mode 100644 index 0000000000..bd2a5c9323 --- /dev/null +++ b/queue-6.15/net-mlx5-fix-memory-leak-in-cmd_exec.patch @@ -0,0 +1,47 @@ +From 8da5ef8de8f29c1a8aeda100aabd14d5989f3a67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Jul 2025 15:06:09 +0300 +Subject: net/mlx5: Fix memory leak in cmd_exec() + +From: Chiara Meiohas + +[ Upstream commit 3afa3ae3db52e3c216d77bd5907a5a86833806cc ] + +If cmd_exec() is called with callback and mlx5_cmd_invoke() returns an +error, resources allocated in cmd_exec() will not be freed. + +Fix the code to release the resources if mlx5_cmd_invoke() returns an +error. + +Fixes: f086470122d5 ("net/mlx5: cmdif, Return value improvements") +Reported-by: Alex Tereshkin +Signed-off-by: Chiara Meiohas +Reviewed-by: Moshe Shemesh +Signed-off-by: Vlad Dumitrescu +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/1752753970-261832-2-git-send-email-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +index e53dbdc0a7a17..34256ce5473ba 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -1948,8 +1948,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, + pages_queue, token, force_polling); +- if (callback) +- return err; ++ if (callback && !err) ++ return 0; + + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); +-- +2.39.5 + diff --git a/queue-6.15/net-sched-sch_qfq-avoid-triggering-might_sleep-in-at.patch b/queue-6.15/net-sched-sch_qfq-avoid-triggering-might_sleep-in-at.patch new file mode 100644 index 0000000000..46397c1522 --- /dev/null +++ b/queue-6.15/net-sched-sch_qfq-avoid-triggering-might_sleep-in-at.patch @@ -0,0 +1,74 @@ +From a79a017740f302f90b0187d0046afc4bf6991aff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Jul 2025 16:01:28 -0700 +Subject: net/sched: sch_qfq: Avoid triggering might_sleep in atomic context in + qfq_delete_class + +From: Xiang Mei + +[ Upstream commit cf074eca0065bc5142e6004ae236bb35a2687fdf ] + +might_sleep could be trigger in the atomic context in qfq_delete_class. + +qfq_destroy_class was moved into atomic context locked +by sch_tree_lock to avoid a race condition bug on +qfq_aggregate. However, might_sleep could be triggered by +qfq_destroy_class, which introduced sleeping in atomic context (path: +qfq_destroy_class->qdisc_put->__qdisc_destroy->lockdep_unregister_key +->might_sleep). + +Considering the race is on the qfq_aggregate objects, keeping +qfq_rm_from_agg in the lock but moving the left part out can solve +this issue. + +Fixes: 5e28d5a3f774 ("net/sched: sch_qfq: Fix race condition on qfq_aggregate") +Reported-by: Dan Carpenter +Signed-off-by: Xiang Mei +Link: https://patch.msgid.link/4a04e0cc-a64b-44e7-9213-2880ed641d77@sabinyo.mountain +Reviewed-by: Cong Wang +Reviewed-by: Dan Carpenter +Link: https://patch.msgid.link/20250717230128.159766-1-xmei5@asu.edu +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_qfq.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c +index 2b1b025c31a33..51cc2cfb40936 100644 +--- a/net/sched/sch_qfq.c ++++ b/net/sched/sch_qfq.c +@@ -536,9 +536,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + + static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) + { +- struct qfq_sched *q = qdisc_priv(sch); +- +- qfq_rm_from_agg(q, cl); + gen_kill_estimator(&cl->rate_est); + qdisc_put(cl->qdisc); + kfree(cl); +@@ -559,10 +556,11 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + + qdisc_purge_queue(cl->qdisc); + qdisc_class_hash_remove(&q->clhash, &cl->common); +- qfq_destroy_class(sch, cl); ++ qfq_rm_from_agg(q, cl); + + sch_tree_unlock(sch); + ++ qfq_destroy_class(sch, cl); + return 0; + } + +@@ -1503,6 +1501,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { ++ qfq_rm_from_agg(q, cl); + qfq_destroy_class(sch, cl); + } + } +-- +2.39.5 + diff --git a/queue-6.15/net-ti-icssg-prueth-fix-buffer-allocation-for-icssg.patch b/queue-6.15/net-ti-icssg-prueth-fix-buffer-allocation-for-icssg.patch new file mode 100644 index 0000000000..f0f7d35504 --- /dev/null +++ b/queue-6.15/net-ti-icssg-prueth-fix-buffer-allocation-for-icssg.patch @@ -0,0 +1,563 @@ +From c5872e260edc512f54bd94aee25ff148f7924022 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Jul 2025 15:12:20 +0530 +Subject: net: ti: icssg-prueth: Fix buffer allocation for ICSSG + +From: Himanshu Mittal + +[ Upstream commit 6e86fb73de0fe3ec5cdcd5873ad1d6005f295b64 ] + +Fixes overlapping buffer allocation for ICSSG peripheral +used for storing packets to be received/transmitted. +There are 3 buffers: +1. Buffer for Locally Injected Packets +2. Buffer for Forwarding Packets +3. Buffer for Host Egress Packets + +In existing allocation buffers for 2. and 3. are overlapping causing +packet corruption. + +Packet corruption observations: +During tcp iperf testing, due to overlapping buffers the received ack +packet overwrites the packet to be transmitted. So, we see packets on +wire with the ack packet content inside the content of next TCP packet +from sender device. + +Details for AM64x switch mode: +-> Allocation by existing driver: ++---------+-------------------------------------------------------------+ +| | SLICE 0 | SLICE 1 | +| +------+--------------+--------+------+--------------+--------+ +| | Slot | Base Address | Size | Slot | Base Address | Size | +|---------+------+--------------+--------+------+--------------+--------+ +| | 0 | 70000000 | 0x2000 | 0 | 70010000 | 0x2000 | +| | 1 | 70002000 | 0x2000 | 1 | 70012000 | 0x2000 | +| | 2 | 70004000 | 0x2000 | 2 | 70014000 | 0x2000 | +| FWD | 3 | 70006000 | 0x2000 | 3 | 70016000 | 0x2000 | +| Buffers | 4 | 70008000 | 0x2000 | 4 | 70018000 | 0x2000 | +| | 5 | 7000A000 | 0x2000 | 5 | 7001A000 | 0x2000 | +| | 6 | 7000C000 | 0x2000 | 6 | 7001C000 | 0x2000 | +| | 7 | 7000E000 | 0x2000 | 7 | 7001E000 | 0x2000 | ++---------+------+--------------+--------+------+--------------+--------+ +| | 8 | 70020000 | 0x1000 | 8 | 70028000 | 0x1000 | +| | 9 | 70021000 | 0x1000 | 9 | 70029000 | 0x1000 | +| | 10 | 70022000 | 0x1000 | 10 | 7002A000 | 0x1000 | +| Our | 11 | 70023000 | 0x1000 | 11 | 7002B000 | 0x1000 | +| LI | 12 | 00000000 | 0x0 | 12 | 00000000 | 0x0 | +| Buffers | 13 | 00000000 | 0x0 | 13 | 00000000 | 0x0 | +| | 14 | 00000000 | 0x0 | 14 | 00000000 | 0x0 | +| | 15 | 00000000 | 0x0 | 15 | 00000000 | 0x0 | ++---------+------+--------------+--------+------+--------------+--------+ +| | 16 | 70024000 | 0x1000 | 16 | 7002C000 | 0x1000 | +| | 17 | 70025000 | 0x1000 | 17 | 7002D000 | 0x1000 | +| | 18 | 70026000 | 0x1000 | 18 | 7002E000 | 0x1000 | +| Their | 19 | 70027000 | 0x1000 | 19 | 7002F000 | 0x1000 | +| LI | 20 | 00000000 | 0x0 | 20 | 00000000 | 0x0 | +| Buffers | 21 | 00000000 | 0x0 | 21 | 00000000 | 0x0 | +| | 22 | 00000000 | 0x0 | 22 | 00000000 | 0x0 | +| | 23 | 00000000 | 0x0 | 23 | 00000000 | 0x0 | ++---------+------+--------------+--------+------+--------------+--------+ +--> here 16, 17, 18, 19 overlapping with below express buffer + ++-----+-----------------------------------------------+ +| | SLICE 0 | SLICE 1 | +| +------------+----------+------------+----------+ +| | Start addr | End addr | Start addr | End addr | ++-----+------------+----------+------------+----------+ +| EXP | 70024000 | 70028000 | 7002C000 | 70030000 | <-- Overlapping +| PRE | 70030000 | 70033800 | 70034000 | 70037800 | ++-----+------------+----------+------------+----------+ + ++---------------------+----------+----------+ +| | SLICE 0 | SLICE 1 | ++---------------------+----------+----------+ +| Default Drop Offset | 00000000 | 00000000 | <-- Field not configured ++---------------------+----------+----------+ + +-> Allocation this patch brings: ++---------+-------------------------------------------------------------+ +| | SLICE 0 | SLICE 1 | +| +------+--------------+--------+------+--------------+--------+ +| | Slot | Base Address | Size | Slot | Base Address | Size | +|---------+------+--------------+--------+------+--------------+--------+ +| | 0 | 70000000 | 0x2000 | 0 | 70040000 | 0x2000 | +| | 1 | 70002000 | 0x2000 | 1 | 70042000 | 0x2000 | +| | 2 | 70004000 | 0x2000 | 2 | 70044000 | 0x2000 | +| FWD | 3 | 70006000 | 0x2000 | 3 | 70046000 | 0x2000 | +| Buffers | 4 | 70008000 | 0x2000 | 4 | 70048000 | 0x2000 | +| | 5 | 7000A000 | 0x2000 | 5 | 7004A000 | 0x2000 | +| | 6 | 7000C000 | 0x2000 | 6 | 7004C000 | 0x2000 | +| | 7 | 7000E000 | 0x2000 | 7 | 7004E000 | 0x2000 | ++---------+------+--------------+--------+------+--------------+--------+ +| | 8 | 70010000 | 0x1000 | 8 | 70050000 | 0x1000 | +| | 9 | 70011000 | 0x1000 | 9 | 70051000 | 0x1000 | +| | 10 | 70012000 | 0x1000 | 10 | 70052000 | 0x1000 | +| Our | 11 | 70013000 | 0x1000 | 11 | 70053000 | 0x1000 | +| LI | 12 | 00000000 | 0x0 | 12 | 00000000 | 0x0 | +| Buffers | 13 | 00000000 | 0x0 | 13 | 00000000 | 0x0 | +| | 14 | 00000000 | 0x0 | 14 | 00000000 | 0x0 | +| | 15 | 00000000 | 0x0 | 15 | 00000000 | 0x0 | ++---------+------+--------------+--------+------+--------------+--------+ +| | 16 | 70014000 | 0x1000 | 16 | 70054000 | 0x1000 | +| | 17 | 70015000 | 0x1000 | 17 | 70055000 | 0x1000 | +| | 18 | 70016000 | 0x1000 | 18 | 70056000 | 0x1000 | +| Their | 19 | 70017000 | 0x1000 | 19 | 70057000 | 0x1000 | +| LI | 20 | 00000000 | 0x0 | 20 | 00000000 | 0x0 | +| Buffers | 21 | 00000000 | 0x0 | 21 | 00000000 | 0x0 | +| | 22 | 00000000 | 0x0 | 22 | 00000000 | 0x0 | +| | 23 | 00000000 | 0x0 | 23 | 00000000 | 0x0 | ++---------+------+--------------+--------+------+--------------+--------+ + ++-----+-----------------------------------------------+ +| | SLICE 0 | SLICE 1 | +| +------------+----------+------------+----------+ +| | Start addr | End addr | Start addr | End addr | ++-----+------------+----------+------------+----------+ +| EXP | 70018000 | 7001C000 | 70058000 | 7005C000 | +| PRE | 7001C000 | 7001F800 | 7005C000 | 7005F800 | ++-----+------------+----------+------------+----------+ + ++---------------------+----------+----------+ +| | SLICE 0 | SLICE 1 | ++---------------------+----------+----------+ +| Default Drop Offset | 7001F800 | 7005F800 | ++---------------------+----------+----------+ + +Rootcause: missing buffer configuration for Express frames in +function: prueth_fw_offload_buffer_setup() + +Details: +Driver implements two distinct buffer configuration functions that are +invoked based on the driver state and ICSSG firmware:- +- prueth_fw_offload_buffer_setup() +- prueth_emac_buffer_setup() + +During initialization, driver creates standard network interfaces +(netdevs) and configures buffers via prueth_emac_buffer_setup(). +This function properly allocates and configures all required memory +regions including: +- LI buffers +- Express packet buffers +- Preemptible packet buffers + +However, when the driver transitions to an offload mode (switch/HSR/PRP), +buffer reconfiguration is handled by prueth_fw_offload_buffer_setup(). +This function does not reconfigure the buffer regions required for +Express packets, leading to incorrect buffer allocation. + +Fixes: abd5576b9c57 ("net: ti: icssg-prueth: Add support for ICSSG switch firmware") +Signed-off-by: Himanshu Mittal +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250717094220.546388-1-h-mittal1@ti.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ti/icssg/icssg_config.c | 158 ++++++++++++------ + drivers/net/ethernet/ti/icssg/icssg_config.h | 80 +++++++-- + drivers/net/ethernet/ti/icssg/icssg_prueth.c | 20 ++- + drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + + .../net/ethernet/ti/icssg/icssg_switch_map.h | 3 + + 5 files changed, 190 insertions(+), 73 deletions(-) + +diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c +index ddfd1c02a8854..da53eb04b0a43 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_config.c ++++ b/drivers/net/ethernet/ti/icssg/icssg_config.c +@@ -288,8 +288,12 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac) + int i; + + addr = lower_32_bits(prueth->msmcram.pa); +- if (slice) +- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; ++ if (slice) { ++ if (prueth->pdata.banked_ms_ram) ++ addr += MSMC_RAM_BANK_SIZE; ++ else ++ addr += PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE; ++ } + + if (addr % SZ_64K) { + dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n"); +@@ -297,43 +301,66 @@ static int prueth_fw_offload_buffer_setup(struct prueth_emac *emac) + } + + bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; +- /* workaround for f/w bug. bpool 0 needs to be initialized */ +- for (i = 0; i < PRUETH_NUM_BUF_POOLS; i++) { ++ ++ /* Configure buffer pools for forwarding buffers ++ * - used by firmware to store packets to be forwarded to other port ++ * - 8 total pools per slice ++ */ ++ for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) { + writel(addr, &bpool_cfg[i].addr); +- writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len); +- addr += PRUETH_EMAC_BUF_POOL_SIZE; ++ writel(PRUETH_SW_FWD_BUF_POOL_SIZE, &bpool_cfg[i].len); ++ addr += PRUETH_SW_FWD_BUF_POOL_SIZE; + } + +- if (!slice) +- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; +- else +- addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST; +- +- for (i = PRUETH_NUM_BUF_POOLS; +- i < 2 * PRUETH_SW_NUM_BUF_POOLS_HOST + PRUETH_NUM_BUF_POOLS; +- i++) { +- /* The driver only uses first 4 queues per PRU so only initialize them */ +- if (i % PRUETH_SW_NUM_BUF_POOLS_HOST < PRUETH_SW_NUM_BUF_POOLS_PER_PRU) { +- writel(addr, &bpool_cfg[i].addr); +- writel(PRUETH_SW_BUF_POOL_SIZE_HOST, &bpool_cfg[i].len); +- addr += PRUETH_SW_BUF_POOL_SIZE_HOST; ++ /* Configure buffer pools for Local Injection buffers ++ * - used by firmware to store packets received from host core ++ * - 16 total pools per slice ++ */ ++ for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) { ++ int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; ++ ++ /* The driver only uses first 4 queues per PRU, ++ * so only initialize buffer for them ++ */ ++ if ((i % PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE) ++ < PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) { ++ writel(addr, &bpool_cfg[cfg_idx].addr); ++ writel(PRUETH_SW_LI_BUF_POOL_SIZE, ++ &bpool_cfg[cfg_idx].len); ++ addr += PRUETH_SW_LI_BUF_POOL_SIZE; + } else { +- writel(0, &bpool_cfg[i].addr); +- writel(0, &bpool_cfg[i].len); ++ writel(0, &bpool_cfg[cfg_idx].addr); ++ writel(0, &bpool_cfg[cfg_idx].len); + } + } + +- if (!slice) +- addr += PRUETH_SW_NUM_BUF_POOLS_HOST * PRUETH_SW_BUF_POOL_SIZE_HOST; +- else +- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; ++ /* Express RX buffer queue ++ * - used by firmware to store express packets to be transmitted ++ * to the host core ++ */ ++ rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; ++ for (i = 0; i < 3; i++) ++ writel(addr, &rxq_ctx->start[i]); ++ ++ addr += PRUETH_SW_HOST_EXP_BUF_POOL_SIZE; ++ writel(addr, &rxq_ctx->end); + ++ /* Pre-emptible RX buffer queue ++ * - used by firmware to store preemptible packets to be transmitted ++ * to the host core ++ */ + rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; + for (i = 0; i < 3; i++) + writel(addr, &rxq_ctx->start[i]); + +- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; +- writel(addr - SZ_2K, &rxq_ctx->end); ++ addr += PRUETH_SW_HOST_PRE_BUF_POOL_SIZE; ++ writel(addr, &rxq_ctx->end); ++ ++ /* Set pointer for default dropped packet write ++ * - used by firmware to temporarily store packet to be dropped ++ */ ++ rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET; ++ writel(addr, &rxq_ctx->start[0]); + + return 0; + } +@@ -347,13 +374,13 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac) + u32 addr; + int i; + +- /* Layout to have 64KB aligned buffer pool +- * |BPOOL0|BPOOL1|RX_CTX0|RX_CTX1| +- */ +- + addr = lower_32_bits(prueth->msmcram.pa); +- if (slice) +- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; ++ if (slice) { ++ if (prueth->pdata.banked_ms_ram) ++ addr += MSMC_RAM_BANK_SIZE; ++ else ++ addr += PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE; ++ } + + if (addr % SZ_64K) { + dev_warn(prueth->dev, "buffer pool needs to be 64KB aligned\n"); +@@ -361,39 +388,66 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac) + } + + bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; +- /* workaround for f/w bug. bpool 0 needs to be initilalized */ +- writel(addr, &bpool_cfg[0].addr); +- writel(0, &bpool_cfg[0].len); + +- for (i = PRUETH_EMAC_BUF_POOL_START; +- i < PRUETH_EMAC_BUF_POOL_START + PRUETH_NUM_BUF_POOLS; +- i++) { +- writel(addr, &bpool_cfg[i].addr); +- writel(PRUETH_EMAC_BUF_POOL_SIZE, &bpool_cfg[i].len); +- addr += PRUETH_EMAC_BUF_POOL_SIZE; ++ /* Configure buffer pools for forwarding buffers ++ * - in mac mode - no forwarding so initialize all pools to 0 ++ * - 8 total pools per slice ++ */ ++ for (i = 0; i < PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; i++) { ++ writel(0, &bpool_cfg[i].addr); ++ writel(0, &bpool_cfg[i].len); + } + +- if (!slice) +- addr += PRUETH_NUM_BUF_POOLS * PRUETH_EMAC_BUF_POOL_SIZE; +- else +- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE * 2; ++ /* Configure buffer pools for Local Injection buffers ++ * - used by firmware to store packets received from host core ++ * - 16 total pools per slice ++ */ ++ bpool_cfg = emac->dram.va + BUFFER_POOL_0_ADDR_OFFSET; ++ for (i = 0; i < PRUETH_NUM_LI_BUF_POOLS_PER_SLICE; i++) { ++ int cfg_idx = i + PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE; ++ ++ /* In EMAC mode, only first 4 buffers are used, ++ * as 1 slice needs to handle only 1 port ++ */ ++ if (i < PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE) { ++ writel(addr, &bpool_cfg[cfg_idx].addr); ++ writel(PRUETH_EMAC_LI_BUF_POOL_SIZE, ++ &bpool_cfg[cfg_idx].len); ++ addr += PRUETH_EMAC_LI_BUF_POOL_SIZE; ++ } else { ++ writel(0, &bpool_cfg[cfg_idx].addr); ++ writel(0, &bpool_cfg[cfg_idx].len); ++ } ++ } + +- /* Pre-emptible RX buffer queue */ +- rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; ++ /* Express RX buffer queue ++ * - used by firmware to store express packets to be transmitted ++ * to host core ++ */ ++ rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; + for (i = 0; i < 3; i++) + writel(addr, &rxq_ctx->start[i]); + +- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; ++ addr += PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE; + writel(addr, &rxq_ctx->end); + +- /* Express RX buffer queue */ +- rxq_ctx = emac->dram.va + HOST_RX_Q_EXP_CONTEXT_OFFSET; ++ /* Pre-emptible RX buffer queue ++ * - used by firmware to store preemptible packets to be transmitted ++ * to host core ++ */ ++ rxq_ctx = emac->dram.va + HOST_RX_Q_PRE_CONTEXT_OFFSET; + for (i = 0; i < 3; i++) + writel(addr, &rxq_ctx->start[i]); + +- addr += PRUETH_EMAC_RX_CTX_BUF_SIZE; ++ addr += PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE; + writel(addr, &rxq_ctx->end); + ++ /* Set pointer for default dropped packet write ++ * - used by firmware to temporarily store packet to be dropped ++ */ ++ rxq_ctx = emac->dram.va + DEFAULT_MSMC_Q_OFFSET; ++ writel(addr, &rxq_ctx->start[0]); ++ + return 0; + } + +diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h +index c884e9fa099e6..60d69744ffae2 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_config.h ++++ b/drivers/net/ethernet/ti/icssg/icssg_config.h +@@ -26,21 +26,71 @@ struct icssg_flow_cfg { + #define PRUETH_MAX_RX_FLOWS 1 /* excluding default flow */ + #define PRUETH_RX_FLOW_DATA 0 + +-#define PRUETH_EMAC_BUF_POOL_SIZE SZ_8K +-#define PRUETH_EMAC_POOLS_PER_SLICE 24 +-#define PRUETH_EMAC_BUF_POOL_START 8 +-#define PRUETH_NUM_BUF_POOLS 8 +-#define PRUETH_EMAC_RX_CTX_BUF_SIZE SZ_16K /* per slice */ +-#define MSMC_RAM_SIZE \ +- (2 * (PRUETH_EMAC_BUF_POOL_SIZE * PRUETH_NUM_BUF_POOLS + \ +- PRUETH_EMAC_RX_CTX_BUF_SIZE * 2)) +- +-#define PRUETH_SW_BUF_POOL_SIZE_HOST SZ_4K +-#define PRUETH_SW_NUM_BUF_POOLS_HOST 8 +-#define PRUETH_SW_NUM_BUF_POOLS_PER_PRU 4 +-#define MSMC_RAM_SIZE_SWITCH_MODE \ +- (MSMC_RAM_SIZE + \ +- (2 * PRUETH_SW_BUF_POOL_SIZE_HOST * PRUETH_SW_NUM_BUF_POOLS_HOST)) ++/* Defines for forwarding path buffer pools: ++ * - used by firmware to store packets to be forwarded to other port ++ * - 8 total pools per slice ++ * - only used in switch mode (as no forwarding in mac mode) ++ */ ++#define PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE 8 ++#define PRUETH_SW_FWD_BUF_POOL_SIZE (SZ_8K) ++ ++/* Defines for local injection path buffer pools: ++ * - used by firmware to store packets received from host core ++ * - 16 total pools per slice ++ * - 8 pools per port per slice and each slice handles both ports ++ * - only 4 out of 8 pools used per port (as only 4 real QoS levels in ICSSG) ++ * - switch mode: 8 total pools used ++ * - mac mode: 4 total pools used ++ */ ++#define PRUETH_NUM_LI_BUF_POOLS_PER_SLICE 16 ++#define PRUETH_NUM_LI_BUF_POOLS_PER_PORT_PER_SLICE 8 ++#define PRUETH_SW_LI_BUF_POOL_SIZE SZ_4K ++#define PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE 8 ++#define PRUETH_SW_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4 ++#define PRUETH_EMAC_LI_BUF_POOL_SIZE SZ_8K ++#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE 4 ++#define PRUETH_EMAC_USED_LI_BUF_POOLS_PER_PORT_PER_SLICE 4 ++ ++/* Defines for host egress path - express and preemptible buffers ++ * - used by firmware to store express and preemptible packets ++ * to be transmitted to host core ++ * - used by both mac/switch modes ++ */ ++#define PRUETH_SW_HOST_EXP_BUF_POOL_SIZE SZ_16K ++#define PRUETH_SW_HOST_PRE_BUF_POOL_SIZE (SZ_16K - SZ_2K) ++#define PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE PRUETH_SW_HOST_EXP_BUF_POOL_SIZE ++#define PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE PRUETH_SW_HOST_PRE_BUF_POOL_SIZE ++ ++/* Buffer used by firmware to temporarily store packet to be dropped */ ++#define PRUETH_SW_DROP_PKT_BUF_SIZE SZ_2K ++#define PRUETH_EMAC_DROP_PKT_BUF_SIZE PRUETH_SW_DROP_PKT_BUF_SIZE ++ ++/* Total switch mode memory usage for buffers per slice */ ++#define PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE \ ++ (PRUETH_SW_FWD_BUF_POOL_SIZE * PRUETH_NUM_FWD_BUF_POOLS_PER_SLICE + \ ++ PRUETH_SW_LI_BUF_POOL_SIZE * PRUETH_SW_USED_LI_BUF_POOLS_PER_SLICE + \ ++ PRUETH_SW_HOST_EXP_BUF_POOL_SIZE + \ ++ PRUETH_SW_HOST_PRE_BUF_POOL_SIZE + \ ++ PRUETH_SW_DROP_PKT_BUF_SIZE) ++ ++/* Total switch mode memory usage for all buffers */ ++#define PRUETH_SW_TOTAL_BUF_SIZE \ ++ (2 * PRUETH_SW_TOTAL_BUF_SIZE_PER_SLICE) ++ ++/* Total mac mode memory usage for buffers per slice */ ++#define PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE \ ++ (PRUETH_EMAC_LI_BUF_POOL_SIZE * \ ++ PRUETH_EMAC_USED_LI_BUF_POOLS_PER_SLICE + \ ++ PRUETH_EMAC_HOST_EXP_BUF_POOL_SIZE + \ ++ PRUETH_EMAC_HOST_PRE_BUF_POOL_SIZE + \ ++ PRUETH_EMAC_DROP_PKT_BUF_SIZE) ++ ++/* Total mac mode memory usage for all buffers */ ++#define PRUETH_EMAC_TOTAL_BUF_SIZE \ ++ (2 * PRUETH_EMAC_TOTAL_BUF_SIZE_PER_SLICE) ++ ++/* Size of 1 bank of MSMC/OC_SRAM memory */ ++#define MSMC_RAM_BANK_SIZE SZ_256K + + #define PRUETH_SWITCH_FDB_MASK ((SIZE_OF_FDB / NUMBER_OF_FDB_BUCKET_ENTRIES) - 1) + +diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c +index 86fc1278127c7..2f5c4335dec38 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c ++++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c +@@ -1764,10 +1764,15 @@ static int prueth_probe(struct platform_device *pdev) + goto put_mem; + } + +- msmc_ram_size = MSMC_RAM_SIZE; + prueth->is_switchmode_supported = prueth->pdata.switch_mode; +- if (prueth->is_switchmode_supported) +- msmc_ram_size = MSMC_RAM_SIZE_SWITCH_MODE; ++ if (prueth->pdata.banked_ms_ram) { ++ /* Reserve 2 MSMC RAM banks for buffers to avoid arbitration */ ++ msmc_ram_size = (2 * MSMC_RAM_BANK_SIZE); ++ } else { ++ msmc_ram_size = PRUETH_EMAC_TOTAL_BUF_SIZE; ++ if (prueth->is_switchmode_supported) ++ msmc_ram_size = PRUETH_SW_TOTAL_BUF_SIZE; ++ } + + /* NOTE: FW bug needs buffer base to be 64KB aligned */ + prueth->msmcram.va = +@@ -1924,7 +1929,8 @@ static int prueth_probe(struct platform_device *pdev) + + free_pool: + gen_pool_free(prueth->sram_pool, +- (unsigned long)prueth->msmcram.va, msmc_ram_size); ++ (unsigned long)prueth->msmcram.va, ++ prueth->msmcram.size); + + put_mem: + pruss_release_mem_region(prueth->pruss, &prueth->shram); +@@ -1976,8 +1982,8 @@ static void prueth_remove(struct platform_device *pdev) + icss_iep_put(prueth->iep0); + + gen_pool_free(prueth->sram_pool, +- (unsigned long)prueth->msmcram.va, +- MSMC_RAM_SIZE); ++ (unsigned long)prueth->msmcram.va, ++ prueth->msmcram.size); + + pruss_release_mem_region(prueth->pruss, &prueth->shram); + +@@ -1994,12 +2000,14 @@ static const struct prueth_pdata am654_icssg_pdata = { + .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, + .quirk_10m_link_issue = 1, + .switch_mode = 1, ++ .banked_ms_ram = 0, + }; + + static const struct prueth_pdata am64x_icssg_pdata = { + .fdqring_mode = K3_RINGACC_RING_MODE_RING, + .quirk_10m_link_issue = 1, + .switch_mode = 1, ++ .banked_ms_ram = 1, + }; + + static const struct of_device_id prueth_dt_match[] = { +diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h +index b6be4aa57a615..0ca8ea0560e52 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h ++++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h +@@ -251,11 +251,13 @@ struct prueth_emac { + * @fdqring_mode: Free desc queue mode + * @quirk_10m_link_issue: 10M link detect errata + * @switch_mode: switch firmware support ++ * @banked_ms_ram: banked memory support + */ + struct prueth_pdata { + enum k3_ring_mode fdqring_mode; + u32 quirk_10m_link_issue:1; + u32 switch_mode:1; ++ u32 banked_ms_ram:1; + }; + + struct icssg_firmwares { +diff --git a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h +index 424a7e945ea84..12541a12ebd67 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h ++++ b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h +@@ -180,6 +180,9 @@ + /* Used to notify the FW of the current link speed */ + #define PORT_LINK_SPEED_OFFSET 0x00A8 + ++/* 2k memory pointer reserved for default writes by PRU0*/ ++#define DEFAULT_MSMC_Q_OFFSET 0x00AC ++ + /* TAS gate mask for windows list0 */ + #define TAS_GATE_MASK_LIST0 0x0100 + +-- +2.39.5 + diff --git a/queue-6.15/s390-ism-fix-concurrency-management-in-ism_cmd.patch b/queue-6.15/s390-ism-fix-concurrency-management-in-ism_cmd.patch new file mode 100644 index 0000000000..c7b3d8f335 --- /dev/null +++ b/queue-6.15/s390-ism-fix-concurrency-management-in-ism_cmd.patch @@ -0,0 +1,100 @@ +From 65ae881081dddfbe0f396189f5ca43c1462f7afc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 18:18:17 +0200 +Subject: s390/ism: fix concurrency management in ism_cmd() + +From: Halil Pasic + +[ Upstream commit 897e8601b9cff1d054cdd53047f568b0e1995726 ] + +The s390x ISM device data sheet clearly states that only one +request-response sequence is allowable per ISM function at any point in +time. Unfortunately as of today the s390/ism driver in Linux does not +honor that requirement. This patch aims to rectify that. + +This problem was discovered based on Aliaksei's bug report which states +that for certain workloads the ISM functions end up entering error state +(with PEC 2 as seen from the logs) after a while and as a consequence +connections handled by the respective function break, and for future +connection requests the ISM device is not considered -- given it is in a +dysfunctional state. During further debugging PEC 3A was observed as +well. + +A kernel message like +[ 1211.244319] zpci: 061a:00:00.0: Event 0x2 reports an error for PCI function 0x61a +is a reliable indicator of the stated function entering error state +with PEC 2. Let me also point out that a kernel message like +[ 1211.244325] zpci: 061a:00:00.0: The ism driver bound to the device does not support error recovery +is a reliable indicator that the ISM function won't be auto-recovered +because the ISM driver currently lacks support for it. + +On a technical level, without this synchronization, commands (inputs to +the FW) may be partially or fully overwritten (corrupted) by another CPU +trying to issue commands on the same function. There is hard evidence that +this can lead to DMB token values being used as DMB IOVAs, leading to +PEC 2 PCI events indicating invalid DMA. But this is only one of the +failure modes imaginable. In theory even completely losing one command +and executing another one twice and then trying to interpret the outputs +as if the command we intended to execute was actually executed and not +the other one is also possible. Frankly, I don't feel confident about +providing an exhaustive list of possible consequences. + +Fixes: 684b89bc39ce ("s390/ism: add device driver for internal shared memory") +Reported-by: Aliaksei Makarau +Tested-by: Mahanta Jambigi +Tested-by: Aliaksei Makarau +Signed-off-by: Halil Pasic +Reviewed-by: Alexandra Winter +Signed-off-by: Alexandra Winter +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722161817.1298473-1-wintera@linux.ibm.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ism_drv.c | 3 +++ + include/linux/ism.h | 1 + + 2 files changed, 4 insertions(+) + +diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c +index 60ed70a39d2cc..b8464d9433e96 100644 +--- a/drivers/s390/net/ism_drv.c ++++ b/drivers/s390/net/ism_drv.c +@@ -130,6 +130,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd) + struct ism_req_hdr *req = cmd; + struct ism_resp_hdr *resp = cmd; + ++ spin_lock(&ism->cmd_lock); + __ism_write_cmd(ism, req + 1, sizeof(*req), req->len - sizeof(*req)); + __ism_write_cmd(ism, req, 0, sizeof(*req)); + +@@ -143,6 +144,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd) + } + __ism_read_cmd(ism, resp + 1, sizeof(*resp), resp->len - sizeof(*resp)); + out: ++ spin_unlock(&ism->cmd_lock); + return resp->ret; + } + +@@ -606,6 +608,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) + return -ENOMEM; + + spin_lock_init(&ism->lock); ++ spin_lock_init(&ism->cmd_lock); + dev_set_drvdata(&pdev->dev, ism); + ism->pdev = pdev; + ism->dev.parent = &pdev->dev; +diff --git a/include/linux/ism.h b/include/linux/ism.h +index 5428edd909823..8358b4cd7ba6a 100644 +--- a/include/linux/ism.h ++++ b/include/linux/ism.h +@@ -28,6 +28,7 @@ struct ism_dmb { + + struct ism_dev { + spinlock_t lock; /* protects the ism device */ ++ spinlock_t cmd_lock; /* serializes cmds */ + struct list_head list; + struct pci_dev *pdev; + +-- +2.39.5 + diff --git a/queue-6.15/selftests-drv-net-wait-for-iperf-client-to-stop-send.patch b/queue-6.15/selftests-drv-net-wait-for-iperf-client-to-stop-send.patch new file mode 100644 index 0000000000..4d6fa1ca2a --- /dev/null +++ b/queue-6.15/selftests-drv-net-wait-for-iperf-client-to-stop-send.patch @@ -0,0 +1,87 @@ +From 6e312791fa41cc84e05001e7aa5019f234bfb827 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Jul 2025 15:26:55 +0300 +Subject: selftests: drv-net: wait for iperf client to stop sending + +From: Nimrod Oren + +[ Upstream commit 86941382508850d58c11bdafe0fec646dfd31b09 ] + +A few packets may still be sent out during the termination of iperf +processes. These late packets cause failures in rss_ctx.py when they +arrive on queues expected to be empty. + +Example failure observed: + + Check failed 2 != 0 traffic on inactive queues (context 1): + [0, 0, 1, 1, 386385, 397196, 0, 0, 0, 0, ...] + + Check failed 4 != 0 traffic on inactive queues (context 2): + [0, 0, 0, 0, 2, 2, 247152, 253013, 0, 0, ...] + + Check failed 2 != 0 traffic on inactive queues (context 3): + [0, 0, 0, 0, 0, 0, 1, 1, 282434, 283070, ...] + +To avoid such failures, wait until all client sockets for the requested +port are either closed or in the TIME_WAIT state. + +Fixes: 847aa551fa78 ("selftests: drv-net: rss_ctx: factor out send traffic and check") +Signed-off-by: Nimrod Oren +Reviewed-by: Gal Pressman +Reviewed-by: Carolina Jubran +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250722122655.3194442-1-noren@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../selftests/drivers/net/lib/py/load.py | 23 +++++++++++++++---- + 1 file changed, 18 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py +index da5af2c680faa..1a9d57c3efa3c 100644 +--- a/tools/testing/selftests/drivers/net/lib/py/load.py ++++ b/tools/testing/selftests/drivers/net/lib/py/load.py +@@ -1,5 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0 + ++import re + import time + + from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg +@@ -10,12 +11,11 @@ class GenerateTraffic: + + self.env = env + +- if port is None: +- port = rand_port() +- self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) +- wait_port_listen(port) ++ self.port = rand_port() if port is None else port ++ self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) ++ wait_port_listen(self.port) + time.sleep(0.1) +- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", ++ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", + background=True, host=env.remote) + + # Wait for traffic to ramp up +@@ -74,3 +74,16 @@ class GenerateTraffic: + ksft_pr(">> Server:") + ksft_pr(self._iperf_server.stdout) + ksft_pr(self._iperf_server.stderr) ++ self._wait_client_stopped() ++ ++ def _wait_client_stopped(self, sleep=0.005, timeout=5): ++ end = time.monotonic() + timeout ++ ++ live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") ++ ++ while time.monotonic() < end: ++ data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout ++ if not live_port_pattern.search(data): ++ return ++ time.sleep(sleep) ++ raise Exception(f"Waiting for client to stop timed out after {timeout}s") +-- +2.39.5 + diff --git a/queue-6.15/series b/queue-6.15/series index 7afeff747d..a8cb7da39b 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -17,3 +17,27 @@ tools-hv-fcopy-fix-incorrect-file-path-conversion.patch x86-hyperv-fix-usage-of-cpu_online_mask-to-get-valid.patch platform-x86-fix-initialization-order-for-firmware_a.patch staging-vchiq_arm-make-vchiq_shutdown-never-fail.patch +xfrm-state-initialize-state_ptrs-earlier-in-xfrm_sta.patch +xfrm-state-use-a-consistent-pcpu_id-in-xfrm_state_fi.patch +xfrm-always-initialize-offload-path.patch +xfrm-set-transport-header-to-fix-udp-gro-handling.patch +xfrm-ipcomp-adjust-transport-header-after-decompress.patch +xfrm-interface-fix-use-after-free-after-changing-col.patch +xfrm-delete-x-tunnel-as-we-delete-x.patch +asoc-mediatek-mt8365-dai-i2s-pass-correct-size-to-mt.patch +net-ti-icssg-prueth-fix-buffer-allocation-for-icssg.patch +net-mlx5-fix-memory-leak-in-cmd_exec.patch +net-mlx5-e-switch-fix-peer-miss-rules-to-use-peer-es.patch +i40e-report-vf-tx_dropped-with-tx_errors-instead-of-.patch +i40e-when-removing-vf-mac-filters-only-check-pf-set-.patch +net-appletalk-fix-use-after-free-in-aarp-proxy-probe.patch +net-sched-sch_qfq-avoid-triggering-might_sleep-in-at.patch +can-netlink-can_changelink-fix-null-pointer-deref-of.patch +drm-bridge-ti-sn65dsi86-remove-extra-semicolon-in-ti.patch +alsa-hda-realtek-fix-mute-led-mask-on-hp-omen-16-lap.patch +selftests-drv-net-wait-for-iperf-client-to-stop-send.patch +s390-ism-fix-concurrency-management-in-ism_cmd.patch +net-hns3-fix-concurrent-setting-vlan-filter-issue.patch +net-hns3-disable-interrupt-when-ptp-init-failed.patch +net-hns3-fixed-vf-get-max-channels-bug.patch +net-hns3-default-enable-tx-bounce-buffer-when-smmu-e.patch diff --git a/queue-6.15/xfrm-always-initialize-offload-path.patch b/queue-6.15/xfrm-always-initialize-offload-path.patch new file mode 100644 index 0000000000..6590323a38 --- /dev/null +++ b/queue-6.15/xfrm-always-initialize-offload-path.patch @@ -0,0 +1,99 @@ +From 493c773b84809695e79c3c54ae78331892927eac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 8 Jun 2025 10:42:53 +0300 +Subject: xfrm: always initialize offload path + +From: Leon Romanovsky + +[ Upstream commit c0f21029f123d1b15f8eddc8e3976bf0c8781c43 ] + +Offload path is used for GRO with SW IPsec, and not just for HW +offload. So initialize it anyway. + +Fixes: 585b64f5a620 ("xfrm: delay initialization of offload path till its actually requested") +Reported-by: Sabrina Dubroca +Closes: https://lore.kernel.org/all/aEGW_5HfPqU1rFjl@krikkit +Signed-off-by: Leon Romanovsky +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + include/net/xfrm.h | 2 +- + net/xfrm/xfrm_device.c | 1 - + net/xfrm/xfrm_state.c | 6 ++---- + net/xfrm/xfrm_user.c | 1 + + 4 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 1f1861c57e2ad..29a0759d5582c 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -474,7 +474,7 @@ struct xfrm_type_offload { + + int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); + void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); +-void xfrm_set_type_offload(struct xfrm_state *x); ++void xfrm_set_type_offload(struct xfrm_state *x, bool try_load); + static inline void xfrm_unset_type_offload(struct xfrm_state *x) + { + if (!x->type_offload) +diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c +index f46a9e5764f01..a2d3a5f3b4852 100644 +--- a/net/xfrm/xfrm_device.c ++++ b/net/xfrm/xfrm_device.c +@@ -305,7 +305,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + return -EINVAL; + } + +- xfrm_set_type_offload(x); + if (!x->type_offload) { + NL_SET_ERR_MSG(extack, "Type doesn't support offload"); + dev_put(dev); +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index cef8d3c20f652..0cf516b4e6d92 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -424,11 +424,10 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, + } + EXPORT_SYMBOL(xfrm_unregister_type_offload); + +-void xfrm_set_type_offload(struct xfrm_state *x) ++void xfrm_set_type_offload(struct xfrm_state *x, bool try_load) + { + const struct xfrm_type_offload *type = NULL; + struct xfrm_state_afinfo *afinfo; +- bool try_load = true; + + retry: + afinfo = xfrm_state_get_afinfo(x->props.family); +@@ -607,6 +606,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) + kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); ++ xfrm_unset_type_offload(x); + if (x->type) { + x->type->destructor(x); + xfrm_put_type(x->type); +@@ -780,8 +780,6 @@ void xfrm_dev_state_free(struct xfrm_state *x) + struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); + +- xfrm_unset_type_offload(x); +- + if (dev && dev->xfrmdev_ops) { + spin_lock_bh(&xfrm_state_dev_gc_lock); + if (!hlist_unhashed(&x->dev_gclist)) +diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c +index 614b58cb26ab7..d17ea437a1587 100644 +--- a/net/xfrm/xfrm_user.c ++++ b/net/xfrm/xfrm_user.c +@@ -977,6 +977,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, + /* override default values from above */ + xfrm_update_ae_params(x, attrs, 0); + ++ xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]); + /* configure the hardware if offload is requested */ + if (attrs[XFRMA_OFFLOAD_DEV]) { + err = xfrm_dev_state_add(net, x, +-- +2.39.5 + diff --git a/queue-6.15/xfrm-delete-x-tunnel-as-we-delete-x.patch b/queue-6.15/xfrm-delete-x-tunnel-as-we-delete-x.patch new file mode 100644 index 0000000000..cc80b81e44 --- /dev/null +++ b/queue-6.15/xfrm-delete-x-tunnel-as-we-delete-x.patch @@ -0,0 +1,196 @@ +From 1e768c16dfda39fd01eba9d4872738753b9e2829 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Jul 2025 16:54:33 +0200 +Subject: xfrm: delete x->tunnel as we delete x + +From: Sabrina Dubroca + +[ Upstream commit b441cf3f8c4b8576639d20c8eb4aa32917602ecd ] + +The ipcomp fallback tunnels currently get deleted (from the various +lists and hashtables) as the last user state that needed that fallback +is destroyed (not deleted). If a reference to that user state still +exists, the fallback state will remain on the hashtables/lists, +triggering the WARN in xfrm_state_fini. Because of those remaining +references, the fix in commit f75a2804da39 ("xfrm: destroy xfrm_state +synchronously on net exit path") is not complete. + +We recently fixed one such situation in TCP due to defered freeing of +skbs (commit 9b6412e6979f ("tcp: drop secpath at the same time as we +currently drop dst")). This can also happen due to IP reassembly: skbs +with a secpath remain on the reassembly queue until netns +destruction. If we can't guarantee that the queues are flushed by the +time xfrm_state_fini runs, there may still be references to a (user) +xfrm_state, preventing the timely deletion of the corresponding +fallback state. + +Instead of chasing each instance of skbs holding a secpath one by one, +this patch fixes the issue directly within xfrm, by deleting the +fallback state as soon as the last user state depending on it has been +deleted. Destruction will still happen when the final reference is +dropped. + +A separate lockdep class for the fallback state is required since +we're going to lock x->tunnel while x is locked. + +Fixes: 9d4139c76905 ("netns xfrm: per-netns xfrm_state_all list") +Signed-off-by: Sabrina Dubroca +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + include/net/xfrm.h | 1 - + net/ipv4/ipcomp.c | 2 ++ + net/ipv6/ipcomp6.c | 2 ++ + net/ipv6/xfrm6_tunnel.c | 2 +- + net/xfrm/xfrm_ipcomp.c | 1 - + net/xfrm/xfrm_state.c | 19 ++++++++----------- + 6 files changed, 13 insertions(+), 14 deletions(-) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 29a0759d5582c..754ba8df33cff 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -441,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); + int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); + + void xfrm_flush_gc(void); +-void xfrm_state_delete_tunnel(struct xfrm_state *x); + + struct xfrm_type { + struct module *owner; +diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c +index 5a4fb2539b08b..9a45aed508d19 100644 +--- a/net/ipv4/ipcomp.c ++++ b/net/ipv4/ipcomp.c +@@ -54,6 +54,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info) + } + + /* We always hold one tunnel user reference to indicate a tunnel */ ++static struct lock_class_key xfrm_state_lock_key; + static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) + { + struct net *net = xs_net(x); +@@ -62,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) + t = xfrm_state_alloc(net); + if (!t) + goto out; ++ lockdep_set_class(&t->lock, &xfrm_state_lock_key); + + t->id.proto = IPPROTO_IPIP; + t->id.spi = x->props.saddr.a4; +diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c +index 72d4858dec18a..8607569de34f3 100644 +--- a/net/ipv6/ipcomp6.c ++++ b/net/ipv6/ipcomp6.c +@@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + return 0; + } + ++static struct lock_class_key xfrm_state_lock_key; + static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) + { + struct net *net = xs_net(x); +@@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) + t = xfrm_state_alloc(net); + if (!t) + goto out; ++ lockdep_set_class(&t->lock, &xfrm_state_lock_key); + + t->id.proto = IPPROTO_IPV6; + t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); +diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c +index bf140ef781c1f..7fd8bc08e6eb1 100644 +--- a/net/ipv6/xfrm6_tunnel.c ++++ b/net/ipv6/xfrm6_tunnel.c +@@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); + unsigned int i; + +- xfrm_flush_gc(); + xfrm_state_flush(net, 0, false, true); ++ xfrm_flush_gc(); + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); +diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c +index a38545413b801..43fdc6ed8dd17 100644 +--- a/net/xfrm/xfrm_ipcomp.c ++++ b/net/xfrm/xfrm_ipcomp.c +@@ -313,7 +313,6 @@ void ipcomp_destroy(struct xfrm_state *x) + struct ipcomp_data *ipcd = x->data; + if (!ipcd) + return; +- xfrm_state_delete_tunnel(x); + ipcomp_free_data(ipcd); + kfree(ipcd); + } +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index 0cf516b4e6d92..4afa2789bfa1e 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -811,6 +811,7 @@ void __xfrm_state_destroy(struct xfrm_state *x, bool sync) + } + EXPORT_SYMBOL(__xfrm_state_destroy); + ++static void xfrm_state_delete_tunnel(struct xfrm_state *x); + int __xfrm_state_delete(struct xfrm_state *x) + { + struct net *net = xs_net(x); +@@ -838,6 +839,8 @@ int __xfrm_state_delete(struct xfrm_state *x) + + xfrm_dev_state_delete(x); + ++ xfrm_state_delete_tunnel(x); ++ + /* All xfrm_state objects are created by xfrm_state_alloc. + * The xfrm_state_alloc call gives a reference, and that + * is what we are dropping here. +@@ -941,10 +944,7 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync) + err = xfrm_state_delete(x); + xfrm_audit_state_delete(x, err ? 0 : 1, + task_valid); +- if (sync) +- xfrm_state_put_sync(x); +- else +- xfrm_state_put(x); ++ xfrm_state_put(x); + if (!err) + cnt++; + +@@ -3060,20 +3060,17 @@ void xfrm_flush_gc(void) + } + EXPORT_SYMBOL(xfrm_flush_gc); + +-/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ +-void xfrm_state_delete_tunnel(struct xfrm_state *x) ++static void xfrm_state_delete_tunnel(struct xfrm_state *x) + { + if (x->tunnel) { + struct xfrm_state *t = x->tunnel; + +- if (atomic_read(&t->tunnel_users) == 2) ++ if (atomic_dec_return(&t->tunnel_users) == 1) + xfrm_state_delete(t); +- atomic_dec(&t->tunnel_users); +- xfrm_state_put_sync(t); ++ xfrm_state_put(t); + x->tunnel = NULL; + } + } +-EXPORT_SYMBOL(xfrm_state_delete_tunnel); + + u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) + { +@@ -3278,8 +3275,8 @@ void xfrm_state_fini(struct net *net) + unsigned int sz; + + flush_work(&net->xfrm.state_hash_work); +- flush_work(&xfrm_state_gc_work); + xfrm_state_flush(net, 0, false, true); ++ flush_work(&xfrm_state_gc_work); + + WARN_ON(!list_empty(&net->xfrm.state_all)); + +-- +2.39.5 + diff --git a/queue-6.15/xfrm-interface-fix-use-after-free-after-changing-col.patch b/queue-6.15/xfrm-interface-fix-use-after-free-after-changing-col.patch new file mode 100644 index 0000000000..276d6dbcd6 --- /dev/null +++ b/queue-6.15/xfrm-interface-fix-use-after-free-after-changing-col.patch @@ -0,0 +1,97 @@ +From c8312693b9f666e3f9d2a8a0579b0d7c9ca22e85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Jul 2025 10:02:58 -0700 +Subject: xfrm: interface: fix use-after-free after changing collect_md xfrm + interface + +From: Eyal Birger + +[ Upstream commit a90b2a1aaacbcf0f91d7e4868ad6c51c5dee814b ] + +collect_md property on xfrm interfaces can only be set on device creation, +thus xfrmi_changelink() should fail when called on such interfaces. + +The check to enforce this was done only in the case where the xi was +returned from xfrmi_locate() which doesn't look for the collect_md +interface, and thus the validation was never reached. + +Calling changelink would thus errornously place the special interface xi +in the xfrmi_net->xfrmi hash, but since it also exists in the +xfrmi_net->collect_md_xfrmi pointer it would lead to a double free when +the net namespace was taken down [1]. + +Change the check to use the xi from netdev_priv which is available earlier +in the function to prevent changes in xfrm collect_md interfaces. + +[1] resulting oops: +[ 8.516540] kernel BUG at net/core/dev.c:12029! +[ 8.516552] Oops: invalid opcode: 0000 [#1] SMP NOPTI +[ 8.516559] CPU: 0 UID: 0 PID: 12 Comm: kworker/u80:0 Not tainted 6.15.0-virtme #5 PREEMPT(voluntary) +[ 8.516565] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +[ 8.516569] Workqueue: netns cleanup_net +[ 8.516579] RIP: 0010:unregister_netdevice_many_notify+0x101/0xab0 +[ 8.516590] Code: 90 0f 0b 90 48 8b b0 78 01 00 00 48 8b 90 80 01 00 00 48 89 56 08 48 89 32 4c 89 80 78 01 00 00 48 89 b8 80 01 00 00 eb ac 90 <0f> 0b 48 8b 45 00 4c 8d a0 88 fe ff ff 48 39 c5 74 5c 41 80 bc 24 +[ 8.516593] RSP: 0018:ffffa93b8006bd30 EFLAGS: 00010206 +[ 8.516598] RAX: ffff98fe4226e000 RBX: ffffa93b8006bd58 RCX: ffffa93b8006bc60 +[ 8.516601] RDX: 0000000000000004 RSI: 0000000000000000 RDI: dead000000000122 +[ 8.516603] RBP: ffffa93b8006bdd8 R08: dead000000000100 R09: ffff98fe4133c100 +[ 8.516605] R10: 0000000000000000 R11: 00000000000003d2 R12: ffffa93b8006be00 +[ 8.516608] R13: ffffffff96c1a510 R14: ffffffff96c1a510 R15: ffffa93b8006be00 +[ 8.516615] FS: 0000000000000000(0000) GS:ffff98fee73b7000(0000) knlGS:0000000000000000 +[ 8.516619] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 8.516622] CR2: 00007fcd2abd0700 CR3: 000000003aa40000 CR4: 0000000000752ef0 +[ 8.516625] PKRU: 55555554 +[ 8.516627] Call Trace: +[ 8.516632] +[ 8.516635] ? rtnl_is_locked+0x15/0x20 +[ 8.516641] ? unregister_netdevice_queue+0x29/0xf0 +[ 8.516650] ops_undo_list+0x1f2/0x220 +[ 8.516659] cleanup_net+0x1ad/0x2e0 +[ 8.516664] process_one_work+0x160/0x380 +[ 8.516673] worker_thread+0x2aa/0x3c0 +[ 8.516679] ? __pfx_worker_thread+0x10/0x10 +[ 8.516686] kthread+0xfb/0x200 +[ 8.516690] ? __pfx_kthread+0x10/0x10 +[ 8.516693] ? __pfx_kthread+0x10/0x10 +[ 8.516697] ret_from_fork+0x82/0xf0 +[ 8.516705] ? __pfx_kthread+0x10/0x10 +[ 8.516709] ret_from_fork_asm+0x1a/0x30 +[ 8.516718] + +Fixes: abc340b38ba2 ("xfrm: interface: support collect metadata mode") +Reported-by: Lonial Con +Signed-off-by: Eyal Birger +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_interface_core.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c +index 622445f041d32..fed96bedd54e3 100644 +--- a/net/xfrm/xfrm_interface_core.c ++++ b/net/xfrm/xfrm_interface_core.c +@@ -875,7 +875,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], + return -EINVAL; + } + +- if (p.collect_md) { ++ if (p.collect_md || xi->p.collect_md) { + NL_SET_ERR_MSG(extack, "collect_md can't be changed"); + return -EINVAL; + } +@@ -886,11 +886,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], + } else { + if (xi->dev != dev) + return -EEXIST; +- if (xi->p.collect_md) { +- NL_SET_ERR_MSG(extack, +- "device can't be changed to collect_md"); +- return -EINVAL; +- } + } + + return xfrmi_update(xi, &p); +-- +2.39.5 + diff --git a/queue-6.15/xfrm-ipcomp-adjust-transport-header-after-decompress.patch b/queue-6.15/xfrm-ipcomp-adjust-transport-header-after-decompress.patch new file mode 100644 index 0000000000..ca8a3faac2 --- /dev/null +++ b/queue-6.15/xfrm-ipcomp-adjust-transport-header-after-decompress.patch @@ -0,0 +1,55 @@ +From f20ce8574ef9436167e99c2bd4a3fa19c89e74de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Jun 2025 15:11:15 +0200 +Subject: xfrm: ipcomp: adjust transport header after decompressing + +From: Fernando Fernandez Mancera + +[ Upstream commit 2ca58d87ebae20906cf808ef813d747db0177a18 ] + +The skb transport header pointer needs to be adjusted by network header +pointer plus the size of the ipcomp header. + +This shows up when running traffic over ipcomp using transport mode. +After being reinjected, packets are dropped because the header isn't +adjusted properly and some checks can be triggered. E.g the skb is +mistakenly considered as IP fragmented packet and later dropped. + +kworker/30:1-mm 443 [030] 102.055250: skb:kfree_skb:skbaddr=0xffff8f104aa3ce00 rx_sk=( + ffffffff8419f1f4 sk_skb_reason_drop+0x94 ([kernel.kallsyms]) + ffffffff8419f1f4 sk_skb_reason_drop+0x94 ([kernel.kallsyms]) + ffffffff84281420 ip_defrag+0x4b0 ([kernel.kallsyms]) + ffffffff8428006e ip_local_deliver+0x4e ([kernel.kallsyms]) + ffffffff8432afb1 xfrm_trans_reinject+0xe1 ([kernel.kallsyms]) + ffffffff83758230 process_one_work+0x190 ([kernel.kallsyms]) + ffffffff83758f37 worker_thread+0x2d7 ([kernel.kallsyms]) + ffffffff83761cc9 kthread+0xf9 ([kernel.kallsyms]) + ffffffff836c3437 ret_from_fork+0x197 ([kernel.kallsyms]) + ffffffff836718da ret_from_fork_asm+0x1a ([kernel.kallsyms]) + +Fixes: eb2953d26971 ("xfrm: ipcomp: Use crypto_acomp interface") +Link: https://bugzilla.suse.com/1244532 +Signed-off-by: Fernando Fernandez Mancera +Acked-by: Herbert Xu +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_ipcomp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c +index 907c3ccb440da..a38545413b801 100644 +--- a/net/xfrm/xfrm_ipcomp.c ++++ b/net/xfrm/xfrm_ipcomp.c +@@ -97,7 +97,7 @@ static int ipcomp_input_done2(struct sk_buff *skb, int err) + struct ip_comp_hdr *ipch = ip_comp_hdr(skb); + const int plen = skb->len; + +- skb_reset_transport_header(skb); ++ skb->transport_header = skb->network_header + sizeof(*ipch); + + return ipcomp_post_acomp(skb, err, 0) ?: + skb->len < (plen + sizeof(ip_comp_hdr)) ? -EINVAL : +-- +2.39.5 + diff --git a/queue-6.15/xfrm-set-transport-header-to-fix-udp-gro-handling.patch b/queue-6.15/xfrm-set-transport-header-to-fix-udp-gro-handling.patch new file mode 100644 index 0000000000..49c73c575f --- /dev/null +++ b/queue-6.15/xfrm-set-transport-header-to-fix-udp-gro-handling.patch @@ -0,0 +1,56 @@ +From 87344881f9906e935671a489c15e11f0e210249a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Jun 2025 14:47:20 +0200 +Subject: xfrm: Set transport header to fix UDP GRO handling + +From: Tobias Brunner + +[ Upstream commit 3ac9e29211fa2df5539ba0d742c8fe9fe95fdc79 ] + +The referenced commit replaced a call to __xfrm4|6_udp_encap_rcv() with +a custom check for non-ESP markers. But what the called function also +did was setting the transport header to the ESP header. The function +that follows, esp4|6_gro_receive(), relies on that being set when it calls +xfrm_parse_spi(). We have to set the full offset as the skb's head was +not moved yet so adding just the UDP header length won't work. + +Fixes: e3fd05777685 ("xfrm: Fix UDP GRO handling for some corner cases") +Signed-off-by: Tobias Brunner +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/ipv4/xfrm4_input.c | 3 +++ + net/ipv6/xfrm6_input.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c +index 0d31a8c108d4f..f28cfd88eaf59 100644 +--- a/net/ipv4/xfrm4_input.c ++++ b/net/ipv4/xfrm4_input.c +@@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) + goto out; + ++ /* set the transport header to ESP */ ++ skb_set_transport_header(skb, offset); ++ + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); +diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c +index 841c81abaaf4f..9005fc156a20e 100644 +--- a/net/ipv6/xfrm6_input.c ++++ b/net/ipv6/xfrm6_input.c +@@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) + goto out; + ++ /* set the transport header to ESP */ ++ skb_set_transport_header(skb, offset); ++ + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); +-- +2.39.5 + diff --git a/queue-6.15/xfrm-state-initialize-state_ptrs-earlier-in-xfrm_sta.patch b/queue-6.15/xfrm-state-initialize-state_ptrs-earlier-in-xfrm_sta.patch new file mode 100644 index 0000000000..cfb7538b18 --- /dev/null +++ b/queue-6.15/xfrm-state-initialize-state_ptrs-earlier-in-xfrm_sta.patch @@ -0,0 +1,55 @@ +From a82d8365f22ae736ad19144300b72ca0fc7d16c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 May 2025 17:11:17 +0200 +Subject: xfrm: state: initialize state_ptrs earlier in xfrm_state_find + +From: Sabrina Dubroca + +[ Upstream commit 94d077c331730510d5611b438640a292097341f0 ] + +In case of preemption, xfrm_state_look_at will find a different +pcpu_id and look up states for that other CPU. If we matched a state +for CPU2 in the state_cache while the lookup started on CPU1, we will +jump to "found", but the "best" state that we got will be ignored and +we will enter the "acquire" block. This block uses state_ptrs, which +isn't initialized at this point. + +Let's initialize state_ptrs just after taking rcu_read_lock. This will +also prevent a possible misuse in the future, if someone adjusts this +function. + +Reported-by: syzbot+7ed9d47e15e88581dc5b@syzkaller.appspotmail.com +Fixes: e952837f3ddb ("xfrm: state: fix out-of-bounds read during lookup") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Florian Westphal +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_state.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index 5ece039846e20..a5a3bf25fd1d4 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1389,6 +1389,8 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); + + rcu_read_lock(); ++ xfrm_hash_ptrs_get(net, &state_ptrs); ++ + hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { + if (x->props.family == encap_family && + x->props.reqid == tmpl->reqid && +@@ -1429,8 +1431,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ + WARN_ON(1); + +- xfrm_hash_ptrs_get(net, &state_ptrs); +- + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { + #ifdef CONFIG_XFRM_OFFLOAD +-- +2.39.5 + diff --git a/queue-6.15/xfrm-state-use-a-consistent-pcpu_id-in-xfrm_state_fi.patch b/queue-6.15/xfrm-state-use-a-consistent-pcpu_id-in-xfrm_state_fi.patch new file mode 100644 index 0000000000..eeb7c80b6f --- /dev/null +++ b/queue-6.15/xfrm-state-use-a-consistent-pcpu_id-in-xfrm_state_fi.patch @@ -0,0 +1,113 @@ +From c5b8ce62dd4dfb2c32331544009b84bf3d5afdb5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 May 2025 17:11:18 +0200 +Subject: xfrm: state: use a consistent pcpu_id in xfrm_state_find + +From: Sabrina Dubroca + +[ Upstream commit 7eb11c0ab70777b9e5145a5ba1c0a2312c3980b2 ] + +If we get preempted during xfrm_state_find, we could run +xfrm_state_look_at using a different pcpu_id than the one +xfrm_state_find saw. This could lead to ignoring states that should +have matched, and triggering acquires on a CPU that already has a pcpu +state. + + xfrm_state_find starts on CPU1 + pcpu_id = 1 + lookup starts + + xfrm_state_look_at pcpu_id = 2 + finds a state +found: + best->pcpu_num != pcpu_id (2 != 1) + if (!x && !error && !acquire_in_progress) { + ... + xfrm_state_alloc + xfrm_init_tempstate + ... + +This can be avoided by passing the original pcpu_id down to all +xfrm_state_look_at() calls. + +Also switch to raw_smp_processor_id, disabling preempting just to +re-enable it immediately doesn't really make sense. + +Fixes: 1ddf9916ac09 ("xfrm: Add support for per cpu xfrm state handling.") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Florian Westphal +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_state.c | 19 ++++++------------- + 1 file changed, 6 insertions(+), 13 deletions(-) + +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index a5a3bf25fd1d4..cef8d3c20f652 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1307,14 +1307,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) + static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, + const struct flowi *fl, unsigned short family, + struct xfrm_state **best, int *acq_in_progress, +- int *error) ++ int *error, unsigned int pcpu_id) + { +- /* We need the cpu id just as a lookup key, +- * we don't require it to be stable. +- */ +- unsigned int pcpu_id = get_cpu(); +- put_cpu(); +- + /* Resolution logic: + * 1. There is a valid state with matching selector. Done. + * 2. Valid state with inappropriate selector. Skip. +@@ -1381,8 +1375,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + /* We need the cpu id just as a lookup key, + * we don't require it to be stable. + */ +- pcpu_id = get_cpu(); +- put_cpu(); ++ pcpu_id = raw_smp_processor_id(); + + to_put = NULL; + +@@ -1402,7 +1395,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, encap_family, +- &best, &acquire_in_progress, &error); ++ &best, &acquire_in_progress, &error, pcpu_id); + } + + if (best) +@@ -1419,7 +1412,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, +- &best, &acquire_in_progress, &error); ++ &best, &acquire_in_progress, &error, pcpu_id); + } + + cached: +@@ -1460,7 +1453,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, +- &best, &acquire_in_progress, &error); ++ &best, &acquire_in_progress, &error, pcpu_id); + } + if (best || acquire_in_progress) + goto found; +@@ -1495,7 +1488,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, +- &best, &acquire_in_progress, &error); ++ &best, &acquire_in_progress, &error, pcpu_id); + } + + found: +-- +2.39.5 +