From: Sasha Levin Date: Sat, 15 Mar 2025 01:12:03 +0000 (-0400) Subject: Fixes for 6.13 X-Git-Tag: v6.6.84~55 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=029bb04144d446c8d96211a0d639f1d2ffdb6b65;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.13 Signed-off-by: Sasha Levin --- diff --git a/queue-6.13/bluetooth-hci_event-fix-enabling-passive-scanning.patch b/queue-6.13/bluetooth-hci_event-fix-enabling-passive-scanning.patch new file mode 100644 index 0000000000..7e13547486 --- /dev/null +++ b/queue-6.13/bluetooth-hci_event-fix-enabling-passive-scanning.patch @@ -0,0 +1,102 @@ +From e6a4dc8d2f51be5b0423a8830ccd7eeea176d405 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Feb 2025 13:12:54 -0500 +Subject: Bluetooth: hci_event: Fix enabling passive scanning + +From: Luiz Augusto von Dentz + +[ Upstream commit 0bdd88971519cfa8a76d1a4dde182e74cfbd5d5c ] + +Passive scanning shall only be enabled when disconnecting LE links, +otherwise it may start result in triggering scanning when e.g. an ISO +link disconnects: + +> HCI Event: LE Meta Event (0x3e) plen 29 + LE Connected Isochronous Stream Established (0x19) + Status: Success (0x00) + Connection Handle: 257 + CIG Synchronization Delay: 0 us (0x000000) + CIS Synchronization Delay: 0 us (0x000000) + Central to Peripheral Latency: 10000 us (0x002710) + Peripheral to Central Latency: 10000 us (0x002710) + Central to Peripheral PHY: LE 2M (0x02) + Peripheral to Central PHY: LE 2M (0x02) + Number of Subevents: 1 + Central to Peripheral Burst Number: 1 + Peripheral to Central Burst Number: 1 + Central to Peripheral Flush Timeout: 2 + Peripheral to Central Flush Timeout: 2 + Central to Peripheral MTU: 320 + Peripheral to Central MTU: 160 + ISO Interval: 10.00 msec (0x0008) +... +> HCI Event: Disconnect Complete (0x05) plen 4 + Status: Success (0x00) + Handle: 257 + Reason: Remote User Terminated Connection (0x13) +< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6 + Extended scan: Enabled (0x01) + Filter duplicates: Enabled (0x01) + Duration: 0 msec (0x0000) + Period: 0.00 sec (0x0000) + +Fixes: 9fcb18ef3acb ("Bluetooth: Introduce LE auto connect options") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 37 ++++++++++++++++++++++--------------- + 1 file changed, 22 insertions(+), 15 deletions(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 2cc7a93063501..903b0b52692aa 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, + hci_update_scan(hdev); + } + +- params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); +- if (params) { +- switch (params->auto_connect) { +- case HCI_AUTO_CONN_LINK_LOSS: +- if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) ++ /* Re-enable passive scanning if disconnected device is marked ++ * as auto-connectable. ++ */ ++ if (conn->type == LE_LINK) { ++ params = hci_conn_params_lookup(hdev, &conn->dst, ++ conn->dst_type); ++ if (params) { ++ switch (params->auto_connect) { ++ case HCI_AUTO_CONN_LINK_LOSS: ++ if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) ++ break; ++ fallthrough; ++ ++ case HCI_AUTO_CONN_DIRECT: ++ case HCI_AUTO_CONN_ALWAYS: ++ hci_pend_le_list_del_init(params); ++ hci_pend_le_list_add(params, ++ &hdev->pend_le_conns); ++ hci_update_passive_scan(hdev); + break; +- fallthrough; + +- case HCI_AUTO_CONN_DIRECT: +- case HCI_AUTO_CONN_ALWAYS: +- hci_pend_le_list_del_init(params); +- hci_pend_le_list_add(params, &hdev->pend_le_conns); +- hci_update_passive_scan(hdev); +- break; +- +- default: +- break; ++ default: ++ break; ++ } + } + } + +-- +2.39.5 + diff --git a/queue-6.13/bluetooth-sco-fix-sco_conn-refcounting-on-sco_conn_r.patch b/queue-6.13/bluetooth-sco-fix-sco_conn-refcounting-on-sco_conn_r.patch new file mode 100644 index 0000000000..091c453c42 --- /dev/null +++ b/queue-6.13/bluetooth-sco-fix-sco_conn-refcounting-on-sco_conn_r.patch @@ -0,0 +1,67 @@ +From 799ab16e9723fdd868bcd428bdc82626439a1921 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Feb 2025 23:28:15 +0200 +Subject: Bluetooth: SCO: fix sco_conn refcounting on sco_conn_ready + +From: Pauli Virtanen + +[ Upstream commit 8d74c9106be8da051b22f0cd81e665f17d51ba5d ] + +sco_conn refcount shall not be incremented a second time if the sk +already owns the refcount, so hold only when adding new chan. + +Add sco_conn_hold() for clarity, as refcnt is never zero here due to the +sco_conn_add(). + +Fixes SCO socket shutdown not actually closing the SCO connection. + +Fixes: ed9588554943 ("Bluetooth: SCO: remove the redundant sco_conn_put") +Signed-off-by: Pauli Virtanen +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/sco.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index aa7bfe26cb40f..ed6846864ea93 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn) + kref_put(&conn->ref, sco_conn_free); + } + ++static struct sco_conn *sco_conn_hold(struct sco_conn *conn) ++{ ++ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); ++ ++ kref_get(&conn->ref); ++ return conn; ++} ++ + static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) + { + if (!conn) +@@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn) + bacpy(&sco_pi(sk)->src, &conn->hcon->src); + bacpy(&sco_pi(sk)->dst, &conn->hcon->dst); + ++ sco_conn_hold(conn); + hci_conn_hold(conn->hcon); + __sco_chan_add(conn, sk, parent); + +@@ -1411,8 +1420,10 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) + struct sco_conn *conn; + + conn = sco_conn_add(hcon); +- if (conn) ++ if (conn) { + sco_conn_ready(conn); ++ sco_conn_put(conn); ++ } + } else + sco_conn_del(hcon, bt_to_errno(status)); + } +-- +2.39.5 + diff --git a/queue-6.13/bonding-fix-incorrect-mac-address-setting-to-receive.patch b/queue-6.13/bonding-fix-incorrect-mac-address-setting-to-receive.patch new file mode 100644 index 0000000000..61103b88f0 --- /dev/null +++ b/queue-6.13/bonding-fix-incorrect-mac-address-setting-to-receive.patch @@ -0,0 +1,131 @@ +From ae74a706d3d31742102b345c7a7f1e2e18d4de08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 02:39:22 +0000 +Subject: bonding: fix incorrect MAC address setting to receive NS messages + +From: Hangbin Liu + +[ Upstream commit 0c5e145a350de3b38cd5ae77a401b12c46fb7c1d ] + +When validation on the backup slave is enabled, we need to validate the +Neighbor Solicitation (NS) messages received on the backup slave. To +receive these messages, the correct destination MAC address must be added +to the slave. However, the target in bonding is a unicast address, which +we cannot use directly. Instead, we should first convert it to a +Solicited-Node Multicast Address and then derive the corresponding MAC +address. + +Fix the incorrect MAC address setting on both slave_set_ns_maddr() and +slave_set_ns_maddrs(). Since the two function names are similar. Add +some description for the functions. Also only use one mac_addr variable +in slave_set_ns_maddr() to save some code and logic. + +Fixes: 8eb36164d1a6 ("bonding: add ns target multicast address to slave device") +Acked-by: Jay Vosburgh +Reviewed-by: Nikolay Aleksandrov +Signed-off-by: Hangbin Liu +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306023923.38777-2-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_options.c | 55 +++++++++++++++++++++++++----- + 1 file changed, 47 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c +index 327b6ecdc77e0..d1b095af253bd 100644 +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla + slave->dev->flags & IFF_MULTICAST; + } + ++/** ++ * slave_set_ns_maddrs - add/del all NS mac addresses for slave ++ * @bond: bond device ++ * @slave: slave device ++ * @add: add or remove all the NS mac addresses ++ * ++ * This function tries to add or delete all the NS mac addresses on the slave ++ * ++ * Note, the IPv6 NS target address is the unicast address in Neighbor ++ * Solicitation (NS) message. The dest address of NS message should be ++ * solicited-node multicast address of the target. The dest mac of NS message ++ * is converted from the solicited-node multicast address. ++ * ++ * This function is called when ++ * * arp_validate changes ++ * * enslaving, releasing new slaves ++ */ + static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) + { + struct in6_addr *targets = bond->params.ns_targets; + char slot_maddr[MAX_ADDR_LEN]; ++ struct in6_addr mcaddr; + int i; + + if (!slave_can_set_ns_maddr(bond, slave)) +@@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool + if (ipv6_addr_any(&targets[i])) + break; + +- if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { ++ addrconf_addr_solict_mult(&targets[i], &mcaddr); ++ if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) { + if (add) + dev_mc_add(slave->dev, slot_maddr); + else +@@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) + slave_set_ns_maddrs(bond, slave, false); + } + ++/** ++ * slave_set_ns_maddr - set new NS mac address for slave ++ * @bond: bond device ++ * @slave: slave device ++ * @target: the new IPv6 target ++ * @slot: the old IPv6 target in the slot ++ * ++ * This function tries to replace the old mac address to new one on the slave. ++ * ++ * Note, the target/slot IPv6 address is the unicast address in Neighbor ++ * Solicitation (NS) message. The dest address of NS message should be ++ * solicited-node multicast address of the target. The dest mac of NS message ++ * is converted from the solicited-node multicast address. ++ * ++ * This function is called when ++ * * An IPv6 NS target is added or removed. ++ */ + static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, + struct in6_addr *target, struct in6_addr *slot) + { +- char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; ++ char mac_addr[MAX_ADDR_LEN]; ++ struct in6_addr mcast_addr; + + if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) + return; + +- /* remove the previous maddr from slave */ ++ /* remove the previous mac addr from slave */ ++ addrconf_addr_solict_mult(slot, &mcast_addr); + if (!ipv6_addr_any(slot) && +- !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) +- dev_mc_del(slave->dev, slot_maddr); ++ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) ++ dev_mc_del(slave->dev, mac_addr); + +- /* add new maddr on slave if target is set */ ++ /* add new mac addr on slave if target is set */ ++ addrconf_addr_solict_mult(target, &mcast_addr); + if (!ipv6_addr_any(target) && +- !ndisc_mc_map(target, target_maddr, slave->dev, 0)) +- dev_mc_add(slave->dev, target_maddr); ++ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0)) ++ dev_mc_add(slave->dev, mac_addr); + } + + static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, +-- +2.39.5 + diff --git a/queue-6.13/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch b/queue-6.13/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch new file mode 100644 index 0000000000..a0b2099659 --- /dev/null +++ b/queue-6.13/drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch @@ -0,0 +1,85 @@ +From ae3d7fb00edba0b31e5c40c5c79d80e72f38528b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 20:52:08 -0700 +Subject: Drivers: hv: vmbus: Don't release fb_mmio resource in + vmbus_free_mmio() + +From: Michael Kelley + +[ Upstream commit 73fe9073c0cc28056cb9de0c8a516dac070f1d1f ] + +The VMBus driver manages the MMIO space it owns via the hyperv_mmio +resource tree. Because the synthetic video framebuffer portion of the +MMIO space is initially setup by the Hyper-V host for each guest, the +VMBus driver does an early reserve of that portion of MMIO space in the +hyperv_mmio resource tree. It saves a pointer to that resource in +fb_mmio. When a VMBus driver requests MMIO space and passes "true" +for the "fb_overlap_ok" argument, the reserved framebuffer space is +used if possible. In that case it's not necessary to do another request +against the "shadow" hyperv_mmio resource tree because that resource +was already requested in the early reserve steps. + +However, the vmbus_free_mmio() function currently does no special +handling for the fb_mmio resource. When a framebuffer device is +removed, or the driver is unbound, the current code for +vmbus_free_mmio() releases the reserved resource, leaving fb_mmio +pointing to memory that has been freed. If the same or another +driver is subsequently bound to the device, vmbus_allocate_mmio() +checks against fb_mmio, and potentially gets garbage. Furthermore +a second unbind operation produces this "nonexistent resource" error +because of the unbalanced behavior between vmbus_allocate_mmio() and +vmbus_free_mmio(): + +[ 55.499643] resource: Trying to free nonexistent + resource <0x00000000f0000000-0x00000000f07fffff> + +Fix this by adding logic to vmbus_free_mmio() to recognize when +MMIO space in the fb_mmio reserved area would be released, and don't +release it. This filtering ensures the fb_mmio resource always exists, +and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio(). + +Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree") +Signed-off-by: Michael Kelley +Tested-by: Saurabh Sengar +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250310035208.275764-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/hv/vmbus_drv.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 0f6cd44fff292..6e55a1a2613d3 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size) + struct resource *iter; + + mutex_lock(&hyperv_mmio_lock); ++ ++ /* ++ * If all bytes of the MMIO range to be released are within the ++ * special case fb_mmio shadow region, skip releasing the shadow ++ * region since no corresponding __request_region() was done ++ * in vmbus_allocate_mmio(). ++ */ ++ if (fb_mmio && start >= fb_mmio->start && ++ (start + size - 1 <= fb_mmio->end)) ++ goto skip_shadow_release; ++ + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= start + size) || (iter->end <= start)) + continue; + + __release_region(iter, start, size); + } ++ ++skip_shadow_release: + release_mem_region(start, size); + mutex_unlock(&hyperv_mmio_lock); + +-- +2.39.5 + diff --git a/queue-6.13/drm-hyperv-fix-address-space-leak-when-hyper-v-drm-d.patch b/queue-6.13/drm-hyperv-fix-address-space-leak-when-hyper-v-drm-d.patch new file mode 100644 index 0000000000..0e20e0a114 --- /dev/null +++ b/queue-6.13/drm-hyperv-fix-address-space-leak-when-hyper-v-drm-d.patch @@ -0,0 +1,53 @@ +From 5d04ddf47272fd4dd6d21a9a51be5f38adc6a1c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Feb 2025 11:34:41 -0800 +Subject: drm/hyperv: Fix address space leak when Hyper-V DRM device is removed + +From: Michael Kelley + +[ Upstream commit aed709355fd05ef747e1af24a1d5d78cd7feb81e ] + +When a Hyper-V DRM device is probed, the driver allocates MMIO space for +the vram, and maps it cacheable. If the device removed, or in the error +path for device probing, the MMIO space is released but no unmap is done. +Consequently the kernel address space for the mapping is leaked. + +Fix this by adding iounmap() calls in the device removal path, and in the +error path during device probing. + +Fixes: f1f63cbb705d ("drm/hyperv: Fix an error handling path in hyperv_vmbus_probe()") +Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size") +Signed-off-by: Michael Kelley +Reviewed-by: Saurabh Sengar +Tested-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250210193441.2414-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250210193441.2414-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +index e0953777a2066..b491827941f19 100644 +--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c ++++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +@@ -156,6 +156,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, + return 0; + + err_free_mmio: ++ iounmap(hv->vram); + vmbus_free_mmio(hv->mem->start, hv->fb_size); + err_vmbus_close: + vmbus_close(hdev->channel); +@@ -174,6 +175,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) + vmbus_close(hdev->channel); + hv_set_drvdata(hdev, NULL); + ++ iounmap(hv->vram); + vmbus_free_mmio(hv->mem->start, hv->fb_size); + } + +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-do-not-update-checksum-in-bnxt_xdp_build_sk.patch b/queue-6.13/eth-bnxt-do-not-update-checksum-in-bnxt_xdp_build_sk.patch new file mode 100644 index 0000000000..595e32c544 --- /dev/null +++ b/queue-6.13/eth-bnxt-do-not-update-checksum-in-bnxt_xdp_build_sk.patch @@ -0,0 +1,137 @@ +From 9e40e7571da7260774ab7bd48066debedc889ab1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:15 +0000 +Subject: eth: bnxt: do not update checksum in bnxt_xdp_build_skb() + +From: Taehee Yoo + +[ Upstream commit c03e7d05aa0e2f7e9a9ce5ad8a12471a53f941dc ] + +The bnxt_rx_pkt() updates ip_summed value at the end if checksum offload +is enabled. +When the XDP-MB program is attached and it returns XDP_PASS, the +bnxt_xdp_build_skb() is called to update skb_shared_info. +The main purpose of bnxt_xdp_build_skb() is to update skb_shared_info, +but it updates ip_summed value too if checksum offload is enabled. +This is actually duplicate work. + +When the bnxt_rx_pkt() updates ip_summed value, it checks if ip_summed +is CHECKSUM_NONE or not. +It means that ip_summed should be CHECKSUM_NONE at this moment. +But ip_summed may already be updated to CHECKSUM_UNNECESSARY in the +XDP-MB-PASS path. +So the by skb_checksum_none_assert() WARNS about it. + +This is duplicate work and updating ip_summed in the +bnxt_xdp_build_skb() is not needed. + +Splat looks like: +WARNING: CPU: 3 PID: 5782 at ./include/linux/skbuff.h:5155 bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] +Modules linked in: bnxt_re bnxt_en rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs veth xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_] +CPU: 3 UID: 0 PID: 5782 Comm: socat Tainted: G W 6.14.0-rc4+ #27 +Tainted: [W]=WARN +Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 +RIP: 0010:bnxt_rx_pkt+0x479b/0x7610 [bnxt_en] +Code: 54 24 0c 4c 89 f1 4c 89 ff c1 ea 1f ff d3 0f 1f 00 49 89 c6 48 85 c0 0f 84 4c e5 ff ff 48 89 c7 e8 ca 3d a0 c8 e9 8f f4 ff ff <0f> 0b f +RSP: 0018:ffff88881ba09928 EFLAGS: 00010202 +RAX: 0000000000000000 RBX: 00000000c7590303 RCX: 0000000000000000 +RDX: 1ffff1104e7d1610 RSI: 0000000000000001 RDI: ffff8881c91300b8 +RBP: ffff88881ba09b28 R08: ffff888273e8b0d0 R09: ffff888273e8b070 +R10: ffff888273e8b010 R11: ffff888278b0f000 R12: ffff888273e8b080 +R13: ffff8881c9130e00 R14: ffff8881505d3800 R15: ffff888273e8b000 +FS: 00007f5a2e7be080(0000) GS:ffff88881ba00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fff2e708ff8 CR3: 000000013e3b0000 CR4: 00000000007506f0 +PKRU: 55555554 +Call Trace: + + ? __warn+0xcd/0x2f0 + ? bnxt_rx_pkt+0x479b/0x7610 + ? report_bug+0x326/0x3c0 + ? handle_bug+0x53/0xa0 + ? exc_invalid_op+0x14/0x50 + ? asm_exc_invalid_op+0x16/0x20 + ? bnxt_rx_pkt+0x479b/0x7610 + ? bnxt_rx_pkt+0x3e41/0x7610 + ? __pfx_bnxt_rx_pkt+0x10/0x10 + ? napi_complete_done+0x2cf/0x7d0 + __bnxt_poll_work+0x4e8/0x1220 + ? __pfx___bnxt_poll_work+0x10/0x10 + ? __pfx_mark_lock.part.0+0x10/0x10 + bnxt_poll_p5+0x36a/0xfa0 + ? __pfx_bnxt_poll_p5+0x10/0x10 + __napi_poll.constprop.0+0xa0/0x440 + net_rx_action+0x899/0xd00 +... + +Following ping.py patch adds xdp-mb-pass case. so ping.py is going +to be able to reproduce this issue. + +Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") +Signed-off-by: Taehee Yoo +Reviewed-by: Somnath Kotur +Link: https://patch.msgid.link/20250309134219.91670-5-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +-- + 3 files changed, 5 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 1b8ed81ef497e..96f8201a41532 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -2219,7 +2219,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + if (!skb) + goto oom_next_rx; + } else { +- skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); ++ skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, ++ rxr->page_pool, &xdp); + if (!skb) { + /* we should be able to free the old skb here */ + bnxt_xdp_buff_frags_free(rxr, &xdp); +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index f9e7e71b89485..8726657f5cb9e 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -456,20 +456,13 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) + + struct sk_buff * + bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, +- struct page_pool *pool, struct xdp_buff *xdp, +- struct rx_cmp_ext *rxcmp1) ++ struct page_pool *pool, struct xdp_buff *xdp) + { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + + if (!skb) + return NULL; +- skb_checksum_none_assert(skb); +- if (RX_CMP_L4_CS_OK(rxcmp1)) { +- if (bp->dev->features & NETIF_F_RXCSUM) { +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- skb->csum_level = RX_CMP_ENCAP(rxcmp1); +- } +- } ++ + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, + BNXT_RX_PAGE_SIZE * num_frags, +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +index 0122782400b8a..220285e190fcd 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +@@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, + struct xdp_buff *xdp); + struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, + u8 num_frags, struct page_pool *pool, +- struct xdp_buff *xdp, +- struct rx_cmp_ext *rxcmp1); ++ struct xdp_buff *xdp); + #endif +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-do-not-use-bnxt_vnic_ntuple-unconditionally.patch b/queue-6.13/eth-bnxt-do-not-use-bnxt_vnic_ntuple-unconditionally.patch new file mode 100644 index 0000000000..d01ac240f8 --- /dev/null +++ b/queue-6.13/eth-bnxt-do-not-use-bnxt_vnic_ntuple-unconditionally.patch @@ -0,0 +1,54 @@ +From 6dbc12fb326365c2035a48eb747ac056b8a14e7c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:14 +0000 +Subject: eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue + restart logic + +From: Taehee Yoo + +[ Upstream commit 661958552eda5bf64bfafb4821cbdded935f1f68 ] + +When a queue is restarted, it sets MRU to 0 for stopping packet flow. +MRU variable is a member of vnic_info[], the first vnic_info is default +and the second is ntuple. +Only when ntuple is enabled(ethtool -K eth0 ntuple on), vnic_info for +ntuple is allocated in init logic. +The bp->nr_vnics indicates how many vnic_info are allocated. +However bnxt_queue_{start | stop}() accesses vnic_info[BNXT_VNIC_NTUPLE] +regardless of ntuple state. + +Reviewed-by: Somnath Kotur +Fixes: b9d2956e869c ("bnxt_en: stop packet flow during bnxt_queue_stop/start") +Signed-off-by: Taehee Yoo +Link: https://patch.msgid.link/20250309134219.91670-4-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index d5d91bbc67924..1b8ed81ef497e 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -15590,7 +15590,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) + cpr = &rxr->bnapi->cp_ring; + cpr->sw_stats->rx.rx_resets++; + +- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { ++ for (i = 0; i <= bp->nr_vnics; i++) { + vnic = &bp->vnic_info[i]; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); +@@ -15618,7 +15618,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) + struct bnxt_vnic_info *vnic; + int i; + +- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { ++ for (i = 0; i <= bp->nr_vnics; i++) { + vnic = &bp->vnic_info[i]; + vnic->mru = 0; + bnxt_hwrm_vnic_update(bp, vnic, +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-fix-kernel-panic-in-the-bnxt_get_queue_stat.patch b/queue-6.13/eth-bnxt-fix-kernel-panic-in-the-bnxt_get_queue_stat.patch new file mode 100644 index 0000000000..6ceabc12c6 --- /dev/null +++ b/queue-6.13/eth-bnxt-fix-kernel-panic-in-the-bnxt_get_queue_stat.patch @@ -0,0 +1,100 @@ +From 96b5481414e8ec4273444ca714c12e5c378633f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:16 +0000 +Subject: eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx} + +From: Taehee Yoo + +[ Upstream commit f09af5fdfbd9b0fcee73aab1116904c53b199e97 ] + +When qstats-get operation is executed, callbacks of netdev_stats_ops +are called. The bnxt_get_queue_stats{rx | tx} collect per-queue stats +from sw_stats in the rings. +But {rx | tx | cp}_ring are allocated when the interface is up. +So, these rings are not allocated when the interface is down. + +The qstats-get is allowed even if the interface is down. However, +the bnxt_get_queue_stats{rx | tx}() accesses cp_ring and tx_ring +without null check. +So, it needs to avoid accessing rings if the interface is down. + +Reproducer: + ip link set $interface down + ./cli.py --spec netdev.yaml --dump qstats-get +OR + ip link set $interface down + python ./stats.py + +Splat looks like: + BUG: kernel NULL pointer dereference, address: 0000000000000000 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 1680fa067 P4D 1680fa067 PUD 16be3b067 PMD 0 + Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI + CPU: 0 UID: 0 PID: 1495 Comm: python3 Not tainted 6.14.0-rc4+ #32 5cd0f999d5a15c574ac72b3e4b907341 + Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 + RIP: 0010:bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en] + Code: c6 87 b5 18 00 00 02 eb a2 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 01 + RSP: 0018:ffffabef43cdb7e0 EFLAGS: 00010282 + RAX: 0000000000000000 RBX: ffffffffc04c8710 RCX: 0000000000000000 + RDX: ffffabef43cdb858 RSI: 0000000000000000 RDI: ffff8d504e850000 + RBP: ffff8d506c9f9c00 R08: 0000000000000004 R09: ffff8d506bcd901c + R10: 0000000000000015 R11: ffff8d506bcd9000 R12: 0000000000000000 + R13: ffffabef43cdb8c0 R14: ffff8d504e850000 R15: 0000000000000000 + FS: 00007f2c5462b080(0000) GS:ffff8d575f600000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000000167fd0000 CR4: 00000000007506f0 + PKRU: 55555554 + Call Trace: + + ? __die+0x20/0x70 + ? page_fault_oops+0x15a/0x460 + ? sched_balance_find_src_group+0x58d/0xd10 + ? exc_page_fault+0x6e/0x180 + ? asm_exc_page_fault+0x22/0x30 + ? bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en cdd546fd48563c280cfd30e9647efa420db07bf1] + netdev_nl_stats_by_netdev+0x2b1/0x4e0 + ? xas_load+0x9/0xb0 + ? xas_find+0x183/0x1d0 + ? xa_find+0x8b/0xe0 + netdev_nl_qstats_get_dumpit+0xbf/0x1e0 + genl_dumpit+0x31/0x90 + netlink_dump+0x1a8/0x360 + +Fixes: af7b3b4adda5 ("eth: bnxt: support per-queue statistics") +Signed-off-by: Taehee Yoo +Reviewed-by: Somnath Kotur +Link: https://patch.msgid.link/20250309134219.91670-6-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 96f8201a41532..a768b71054fcf 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -15331,6 +15331,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, + struct bnxt_cp_ring_info *cpr; + u64 *sw; + ++ if (!bp->bnapi) ++ return; ++ + cpr = &bp->bnapi[i]->cp_ring; + sw = cpr->stats.sw_stats; + +@@ -15354,6 +15357,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i, + struct bnxt_napi *bnapi; + u64 *sw; + ++ if (!bp->tx_ring) ++ return; ++ + bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi; + sw = bnapi->cp_ring.stats.sw_stats; + +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-fix-memory-leak-in-queue-reset.patch b/queue-6.13/eth-bnxt-fix-memory-leak-in-queue-reset.patch new file mode 100644 index 0000000000..a3b30d2484 --- /dev/null +++ b/queue-6.13/eth-bnxt-fix-memory-leak-in-queue-reset.patch @@ -0,0 +1,63 @@ +From 4924eecbc294ebf97c4436fc97949a195f85e331 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:17 +0000 +Subject: eth: bnxt: fix memory leak in queue reset + +From: Taehee Yoo + +[ Upstream commit 87dd2850835dd7886726b428a8ef7d73a60520c7 ] + +When the queue is reset, the bnxt_alloc_one_tpa_info() is called to +allocate tpa_info for the new queue. +And then the old queue's tpa_info should be removed by the +bnxt_free_one_tpa_info(), but it is not called. +So memory leak occurs. +It adds the bnxt_free_one_tpa_info() in the bnxt_queue_mem_free(). + +unreferenced object 0xffff888293cc0000 (size 16384): + comm "ncdevmem", pid 2076, jiffies 4296604081 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 40 75 78 93 82 88 ff ff ........@ux..... + 40 75 78 93 02 00 00 00 00 00 00 00 00 00 00 00 @ux............. + backtrace (crc 5d7d4798): + ___kmalloc_large_node+0x10d/0x1b0 + __kmalloc_large_node_noprof+0x17/0x60 + __kmalloc_noprof+0x3f6/0x520 + bnxt_alloc_one_tpa_info+0x5f/0x300 [bnxt_en] + bnxt_queue_mem_alloc+0x8e8/0x14f0 [bnxt_en] + netdev_rx_queue_restart+0x233/0x620 + net_devmem_bind_dmabuf_to_queue+0x2a3/0x600 + netdev_nl_bind_rx_doit+0xc00/0x10a0 + genl_family_rcv_msg_doit+0x1d4/0x2b0 + genl_rcv_msg+0x3fb/0x6c0 + netlink_rcv_skb+0x12c/0x360 + genl_rcv+0x24/0x40 + netlink_unicast+0x447/0x710 + netlink_sendmsg+0x712/0xbc0 + __sys_sendto+0x3fd/0x4d0 + __x64_sys_sendto+0xdc/0x1b0 + +Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") +Signed-off-by: Taehee Yoo +Link: https://patch.msgid.link/20250309134219.91670-7-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index a768b71054fcf..a79c78061d1d0 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -15486,6 +15486,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) + struct bnxt_ring_struct *ring; + + bnxt_free_one_rx_ring_skbs(bp, rxr); ++ bnxt_free_one_tpa_info(bp, rxr); + + xdp_rxq_info_unreg(&rxr->xdp_rxq); + +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-fix-truesize-for-mb-xdp-pass-case.patch b/queue-6.13/eth-bnxt-fix-truesize-for-mb-xdp-pass-case.patch new file mode 100644 index 0000000000..fd5ac67b05 --- /dev/null +++ b/queue-6.13/eth-bnxt-fix-truesize-for-mb-xdp-pass-case.patch @@ -0,0 +1,124 @@ +From 5b79278b43c7d81438beba11d35ee4cfc68856c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:12 +0000 +Subject: eth: bnxt: fix truesize for mb-xdp-pass case + +From: Taehee Yoo + +[ Upstream commit 9f7b2aa5034e24d3c49db73d5f760c0435fe31c2 ] + +When mb-xdp is set and return is XDP_PASS, packet is converted from +xdp_buff to sk_buff with xdp_update_skb_shared_info() in +bnxt_xdp_build_skb(). +bnxt_xdp_build_skb() passes incorrect truesize argument to +xdp_update_skb_shared_info(). +The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags but +the skb_shared_info was wiped by napi_build_skb() before. +So it stores sinfo->nr_frags before bnxt_xdp_build_skb() and use it +instead of getting skb_shared_info from xdp_get_shared_info_from_buff(). + +Splat looks like: + ------------[ cut here ]------------ + WARNING: CPU: 2 PID: 0 at net/core/skbuff.c:6072 skb_try_coalesce+0x504/0x590 + Modules linked in: xt_nat xt_tcpudp veth af_packet xt_conntrack nft_chain_nat xt_MASQUERADE nf_conntrack_netlink xfrm_user xt_addrtype nft_coms + CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 6.14.0-rc2+ #3 + RIP: 0010:skb_try_coalesce+0x504/0x590 + Code: 4b fd ff ff 49 8b 34 24 40 80 e6 40 0f 84 3d fd ff ff 49 8b 74 24 48 40 f6 c6 01 0f 84 2e fd ff ff 48 8d 4e ff e9 25 fd ff ff <0f> 0b e99 + RSP: 0018:ffffb62c4120caa8 EFLAGS: 00010287 + RAX: 0000000000000003 RBX: ffffb62c4120cb14 RCX: 0000000000000ec0 + RDX: 0000000000001000 RSI: ffffa06e5d7dc000 RDI: 0000000000000003 + RBP: ffffa06e5d7ddec0 R08: ffffa06e6120a800 R09: ffffa06e7a119900 + R10: 0000000000002310 R11: ffffa06e5d7dcec0 R12: ffffe4360575f740 + R13: ffffe43600000000 R14: 0000000000000002 R15: 0000000000000002 + FS: 0000000000000000(0000) GS:ffffa0755f700000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f147b76b0f8 CR3: 00000001615d4000 CR4: 00000000007506f0 + PKRU: 55555554 + Call Trace: + + ? __warn+0x84/0x130 + ? skb_try_coalesce+0x504/0x590 + ? report_bug+0x18a/0x1a0 + ? handle_bug+0x53/0x90 + ? exc_invalid_op+0x14/0x70 + ? asm_exc_invalid_op+0x16/0x20 + ? skb_try_coalesce+0x504/0x590 + inet_frag_reasm_finish+0x11f/0x2e0 + ip_defrag+0x37a/0x900 + ip_local_deliver+0x51/0x120 + ip_sublist_rcv_finish+0x64/0x70 + ip_sublist_rcv+0x179/0x210 + ip_list_rcv+0xf9/0x130 + +How to reproduce: + +ip link set $interface1 xdp obj xdp_pass.o +ip link set $interface1 mtu 9000 up +ip a a 10.0.0.1/24 dev $interface1 + +ip link set $interfac2 mtu 9000 up +ip a a 10.0.0.2/24 dev $interface2 +ping 10.0.0.1 -s 65000 + +Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be +able to reproduce this issue. + +Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") +Signed-off-by: Taehee Yoo +Link: https://patch.msgid.link/20250309134219.91670-2-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index b6f844cac80eb..6357126b87c3f 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -2039,6 +2039,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + struct rx_cmp_ext *rxcmp1; + u32 tmp_raw_cons = *raw_cons; + u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); ++ struct skb_shared_info *sinfo; + struct bnxt_sw_rx_bd *rx_buf; + unsigned int len; + u8 *data_ptr, agg_bufs, cmp_type; +@@ -2165,6 +2166,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + false); + if (!frag_len) + goto oom_next_rx; ++ + } + xdp_active = true; + } +@@ -2174,6 +2176,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + rc = 1; + goto next_rx; + } ++ if (xdp_buff_has_frags(&xdp)) { ++ sinfo = xdp_get_shared_info_from_buff(&xdp); ++ agg_bufs = sinfo->nr_frags; ++ } else { ++ agg_bufs = 0; ++ } + } + + if (len <= bp->rx_copy_thresh) { +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +index dc51dce209d5f..f9e7e71b89485 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +@@ -472,7 +472,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, + } + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, +- BNXT_RX_PAGE_SIZE * sinfo->nr_frags, ++ BNXT_RX_PAGE_SIZE * num_frags, + xdp_buff_is_frag_pfmemalloc(xdp)); + return skb; + } +-- +2.39.5 + diff --git a/queue-6.13/eth-bnxt-return-fail-if-interface-is-down-in-bnxt_qu.patch b/queue-6.13/eth-bnxt-return-fail-if-interface-is-down-in-bnxt_qu.patch new file mode 100644 index 0000000000..d9ca1b0890 --- /dev/null +++ b/queue-6.13/eth-bnxt-return-fail-if-interface-is-down-in-bnxt_qu.patch @@ -0,0 +1,84 @@ +From 2e63b15e418fd3573935155cb496be4a013c8e53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 13:42:13 +0000 +Subject: eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc() + +From: Taehee Yoo + +[ Upstream commit ca2456e073957781e1184de68551c65161b2bd30 ] + +The bnxt_queue_mem_alloc() is called to allocate new queue memory when +a queue is restarted. +It internally accesses rx buffer descriptor corresponding to the index. +The rx buffer descriptor is allocated and set when the interface is up +and it's freed when the interface is down. +So, if queue is restarted if interface is down, kernel panic occurs. + +Splat looks like: + BUG: unable to handle page fault for address: 000000000000b240 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI + CPU: 3 UID: 0 PID: 1563 Comm: ncdevmem2 Not tainted 6.14.0-rc2+ #9 844ddba6e7c459cafd0bf4db9a3198e + Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 + RIP: 0010:bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en] + Code: 41 54 4d 89 c4 4d 69 c0 c0 05 00 00 55 48 89 f5 53 48 89 fb 4c 8d b5 40 05 00 00 48 83 ec 15 + RSP: 0018:ffff9dcc83fef9e8 EFLAGS: 00010202 + RAX: ffffffffc0457720 RBX: ffff934ed8d40000 RCX: 0000000000000000 + RDX: 000000000000001f RSI: ffff934ea508f800 RDI: ffff934ea508f808 + RBP: ffff934ea508f800 R08: 000000000000b240 R09: ffff934e84f4b000 + R10: ffff9dcc83fefa30 R11: ffff934e84f4b000 R12: 000000000000001f + R13: ffff934ed8d40ac0 R14: ffff934ea508fd40 R15: ffff934e84f4b000 + FS: 00007fa73888c740(0000) GS:ffff93559f780000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000000000000b240 CR3: 0000000145a2e000 CR4: 00000000007506f0 + PKRU: 55555554 + Call Trace: + + ? __die+0x20/0x70 + ? page_fault_oops+0x15a/0x460 + ? exc_page_fault+0x6e/0x180 + ? asm_exc_page_fault+0x22/0x30 + ? __pfx_bnxt_queue_mem_alloc+0x10/0x10 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] + ? bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7] + netdev_rx_queue_restart+0xc5/0x240 + net_devmem_bind_dmabuf_to_queue+0xf8/0x200 + netdev_nl_bind_rx_doit+0x3a7/0x450 + genl_family_rcv_msg_doit+0xd9/0x130 + genl_rcv_msg+0x184/0x2b0 + ? __pfx_netdev_nl_bind_rx_doit+0x10/0x10 + ? __pfx_genl_rcv_msg+0x10/0x10 + netlink_rcv_skb+0x54/0x100 + genl_rcv+0x24/0x40 +... + +Reviewed-by: Somnath Kotur +Reviewed-by: Jakub Kicinski +Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") +Signed-off-by: Taehee Yoo +Reviewed-by: Mina Almasry +Link: https://patch.msgid.link/20250309134219.91670-3-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 6357126b87c3f..d5d91bbc67924 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -15394,6 +15394,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) + struct bnxt_ring_struct *ring; + int rc; + ++ if (!bp->rx_ring) ++ return -ENETDOWN; ++ + rxr = &bp->rx_ring[idx]; + clone = qmem; + memcpy(clone, rxr, sizeof(*rxr)); +-- +2.39.5 + diff --git a/queue-6.13/fbdev-hyperv_fb-allow-graceful-removal-of-framebuffe.patch b/queue-6.13/fbdev-hyperv_fb-allow-graceful-removal-of-framebuffe.patch new file mode 100644 index 0000000000..58a0551be4 --- /dev/null +++ b/queue-6.13/fbdev-hyperv_fb-allow-graceful-removal-of-framebuffe.patch @@ -0,0 +1,117 @@ +From 9b9bf4776b3687a0240aa616cd9390c86b79a581 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Mar 2025 08:16:31 -0800 +Subject: fbdev: hyperv_fb: Allow graceful removal of framebuffer + +From: Saurabh Sengar + +[ Upstream commit ea2f45ab0e53b255f72c85ccd99e2b394fc5fceb ] + +When a Hyper-V framebuffer device is unbind, hyperv_fb driver tries to +release the framebuffer forcefully. If this framebuffer is in use it +produce the following WARN and hence this framebuffer is never released. + +[ 44.111220] WARNING: CPU: 35 PID: 1882 at drivers/video/fbdev/core/fb_info.c:70 framebuffer_release+0x2c/0x40 +< snip > +[ 44.111289] Call Trace: +[ 44.111290] +[ 44.111291] ? show_regs+0x6c/0x80 +[ 44.111295] ? __warn+0x8d/0x150 +[ 44.111298] ? framebuffer_release+0x2c/0x40 +[ 44.111300] ? report_bug+0x182/0x1b0 +[ 44.111303] ? handle_bug+0x6e/0xb0 +[ 44.111306] ? exc_invalid_op+0x18/0x80 +[ 44.111308] ? asm_exc_invalid_op+0x1b/0x20 +[ 44.111311] ? framebuffer_release+0x2c/0x40 +[ 44.111313] ? hvfb_remove+0x86/0xa0 [hyperv_fb] +[ 44.111315] vmbus_remove+0x24/0x40 [hv_vmbus] +[ 44.111323] device_remove+0x40/0x80 +[ 44.111325] device_release_driver_internal+0x20b/0x270 +[ 44.111327] ? bus_find_device+0xb3/0xf0 + +Fix this by moving the release of framebuffer and assosiated memory +to fb_ops.fb_destroy function, so that framebuffer framework handles +it gracefully. + +While we fix this, also replace manual registrations/unregistration of +framebuffer with devm_register_framebuffer. + +Fixes: 68a2d20b79b1 ("drivers/video: add Hyper-V Synthetic Video Frame Buffer Driver") + +Signed-off-by: Saurabh Sengar +Reviewed-by: Michael Kelley +Tested-by: Michael Kelley +Link: https://lore.kernel.org/r/1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com +Signed-off-by: Wei Liu +Message-ID: <1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com> +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index 9798a34ac571f..75338ffc703fb 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -282,6 +282,8 @@ static uint screen_depth; + static uint screen_fb_size; + static uint dio_fb_size; /* FB size for deferred IO */ + ++static void hvfb_putmem(struct fb_info *info); ++ + /* Send message to Hyper-V host */ + static inline int synthvid_send(struct hv_device *hdev, + struct synthvid_msg *msg) +@@ -862,6 +864,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, + hvfb_ondemand_refresh_throttle(par, x, y, width, height); + } + ++/* ++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end ++ * of unregister_framebuffer() or fb_release(). Do any cleanup related to ++ * framebuffer here. ++ */ ++static void hvfb_destroy(struct fb_info *info) ++{ ++ hvfb_putmem(info); ++ framebuffer_release(info); ++} ++ + /* + * TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the + * driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases. +@@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = { + .fb_set_par = hvfb_set_par, + .fb_setcolreg = hvfb_setcolreg, + .fb_blank = hvfb_blank, ++ .fb_destroy = hvfb_destroy, + }; + + /* Get options from kernel paramenter "video=" */ +@@ -1178,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev, + if (ret) + goto error; + +- ret = register_framebuffer(info); ++ ret = devm_register_framebuffer(&hdev->device, info); + if (ret) { + pr_err("Unable to register framebuffer\n"); + goto error; +@@ -1226,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev) + + fb_deferred_io_cleanup(info); + +- unregister_framebuffer(info); + cancel_delayed_work_sync(&par->dwork); + + vmbus_close(hdev->channel); + hv_set_drvdata(hdev, NULL); +- +- hvfb_putmem(info); +- framebuffer_release(info); + } + + static int hvfb_suspend(struct hv_device *hdev) +-- +2.39.5 + diff --git a/queue-6.13/fbdev-hyperv_fb-fix-hang-in-kdump-kernel-when-on-hyp.patch b/queue-6.13/fbdev-hyperv_fb-fix-hang-in-kdump-kernel-when-on-hyp.patch new file mode 100644 index 0000000000..bbe17bb047 --- /dev/null +++ b/queue-6.13/fbdev-hyperv_fb-fix-hang-in-kdump-kernel-when-on-hyp.patch @@ -0,0 +1,143 @@ +From 9dbd7baa046540cbd885cf34f18d8a16be4689fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Feb 2025 15:01:30 -0800 +Subject: fbdev: hyperv_fb: Fix hang in kdump kernel when on Hyper-V Gen 2 VMs + +From: Michael Kelley + +[ Upstream commit 304386373007aaca9236a3f36afac0bbedcd2bf0 ] + +Gen 2 Hyper-V VMs boot via EFI and have a standard EFI framebuffer +device. When the kdump kernel runs in such a VM, loading the efifb +driver may hang because of accessing the framebuffer at the wrong +memory address. + +The scenario occurs when the hyperv_fb driver in the original kernel +moves the framebuffer to a different MMIO address because of conflicts +with an already-running efifb or simplefb driver. The hyperv_fb driver +then informs Hyper-V of the change, which is allowed by the Hyper-V FB +VMBus device protocol. However, when the kexec command loads the kdump +kernel into crash memory via the kexec_file_load() system call, the +system call doesn't know the framebuffer has moved, and it sets up the +kdump screen_info using the original framebuffer address. The transition +to the kdump kernel does not go through the Hyper-V host, so Hyper-V +does not reset the framebuffer address like it would do on a reboot. +When efifb tries to run, it accesses a non-existent framebuffer +address, which traps to the Hyper-V host. After many such accesses, +the Hyper-V host thinks the guest is being malicious, and throttles +the guest to the point that it runs very slowly or appears to have hung. + +When the kdump kernel is loaded into crash memory via the kexec_load() +system call, the problem does not occur. In this case, the kexec command +builds the screen_info table itself in user space from data returned +by the FBIOGET_FSCREENINFO ioctl against /dev/fb0, which gives it the +new framebuffer location. + +This problem was originally reported in 2020 [1], resulting in commit +3cb73bc3fa2a ("hyperv_fb: Update screen_info after removing old +framebuffer"). This commit solved the problem by setting orig_video_isVGA +to 0, so the kdump kernel was unaware of the EFI framebuffer. The efifb +driver did not try to load, and no hang occurred. But in 2024, commit +c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") +effectively reverted 3cb73bc3fa2a. Commit c25a19afb81c has no reference +to 3cb73bc3fa2a, so perhaps it was done without knowing the implications +that were reported with 3cb73bc3fa2a. In any case, as of commit +c25a19afb81c, the original problem came back again. + +Interestingly, the hyperv_drm driver does not have this problem because +it never moves the framebuffer. The difference is that the hyperv_drm +driver removes any conflicting framebuffers *before* allocating an MMIO +address, while the hyperv_fb drivers removes conflicting framebuffers +*after* allocating an MMIO address. With the "after" ordering, hyperv_fb +may encounter a conflict and move the framebuffer to a different MMIO +address. But the conflict is essentially bogus because it is removed +a few lines of code later. + +Rather than fix the problem with the approach from 2020 in commit +3cb73bc3fa2a, instead slightly reorder the steps in hyperv_fb so +conflicting framebuffers are removed before allocating an MMIO address. +Then the default framebuffer MMIO address should always be available, and +there's never any confusion about which framebuffer address the kdump +kernel should use -- it's always the original address provided by +the Hyper-V host. This approach is already used by the hyperv_drm +driver, and is consistent with the usage guidelines at the head of +the module with the function aperture_remove_conflicting_devices(). + +This approach also solves a related minor problem when kexec_load() +is used to load the kdump kernel. With current code, unbinding and +rebinding the hyperv_fb driver could result in the framebuffer moving +back to the default framebuffer address, because on the rebind there +are no conflicts. If such a move is done after the kdump kernel is +loaded with the new framebuffer address, at kdump time it could again +have the wrong address. + +This problem and fix are described in terms of the kdump kernel, but +it can also occur with any kernel started via kexec. + +See extensive discussion of the problem and solution at [2]. + +[1] https://lore.kernel.org/linux-hyperv/20201014092429.1415040-1-kasong@redhat.com/ +[2] https://lore.kernel.org/linux-hyperv/BLAPR10MB521793485093FDB448F7B2E5FDE92@BLAPR10MB5217.namprd10.prod.outlook.com/ + +Reported-by: Thomas Tai +Fixes: c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info") +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/20250218230130.3207-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250218230130.3207-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index 363e4ccfcdb77..ce23d0ef5702a 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -989,6 +989,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) + + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); ++ aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); + + /* + * For Gen 1 VM, we can directly use the contiguous memory +@@ -1010,11 +1011,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) + goto getmem_done; + } + pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); ++ } else { ++ aperture_remove_all_conflicting_devices(KBUILD_MODNAME); + } + + /* +- * Cannot use the contiguous physical memory. +- * Allocate mmio space for framebuffer. ++ * Cannot use contiguous physical memory, so allocate MMIO space for ++ * the framebuffer. At this point in the function, conflicting devices ++ * that might have claimed the framebuffer MMIO space based on ++ * screen_info.lfb_base must have already been removed so that ++ * vmbus_allocate_mmio() does not allocate different MMIO space. If the ++ * kdump image were to be loaded using kexec_file_load(), the ++ * framebuffer location in the kdump image would be set from ++ * screen_info.lfb_base at the time that kdump is enabled. If the ++ * framebuffer has moved elsewhere, this could be the wrong location, ++ * causing kdump to hang when efifb (for example) loads. + */ + dio_fb_size = + screen_width * screen_height * screen_depth / 8; +@@ -1051,11 +1062,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) + info->screen_size = dio_fb_size; + + getmem_done: +- if (base && size) +- aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME); +- else +- aperture_remove_all_conflicting_devices(KBUILD_MODNAME); +- + if (!gen2vm) + pci_dev_put(pdev); + +-- +2.39.5 + diff --git a/queue-6.13/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch b/queue-6.13/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch new file mode 100644 index 0000000000..e4a9577457 --- /dev/null +++ b/queue-6.13/fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch @@ -0,0 +1,53 @@ +From ad7f89b65b7cfaef6257f0c68341c71cf2169b1d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Feb 2025 15:52:52 -0800 +Subject: fbdev: hyperv_fb: iounmap() the correct memory when removing a device + +From: Michael Kelley + +[ Upstream commit 7241c886a71797cc51efc6fadec7076fcf6435c2 ] + +When a Hyper-V framebuffer device is removed, or the driver is unbound +from a device, any allocated and/or mapped memory must be released. In +particular, MMIO address space that was mapped to the framebuffer must +be unmapped. Current code unmaps the wrong address, resulting in an +error like: + +[ 4093.980597] iounmap: bad address 00000000c936c05c + +followed by a stack dump. + +Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for +Hyper-V frame buffer driver") changed the kind of address stored in +info->screen_base, and the iounmap() call in hvfb_putmem() was not +updated accordingly. + +Fix this by updating hvfb_putmem() to unmap the correct address. + +Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver") +Signed-off-by: Michael Kelley +Reviewed-by: Saurabh Sengar +Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com +Signed-off-by: Wei Liu +Message-ID: <20250209235252.2987-1-mhklinux@outlook.com> +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index 7fdb5edd7e2e8..363e4ccfcdb77 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -1080,7 +1080,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) + + if (par->need_docopy) { + vfree(par->dio_vp); +- iounmap(info->screen_base); ++ iounmap(par->mmio_vp); + vmbus_free_mmio(par->mem->start, screen_fb_size); + } else { + hvfb_release_phymem(hdev, info->fix.smem_start, +-- +2.39.5 + diff --git a/queue-6.13/fbdev-hyperv_fb-simplify-hvfb_putmem.patch b/queue-6.13/fbdev-hyperv_fb-simplify-hvfb_putmem.patch new file mode 100644 index 0000000000..65519fd7f8 --- /dev/null +++ b/queue-6.13/fbdev-hyperv_fb-simplify-hvfb_putmem.patch @@ -0,0 +1,87 @@ +From 1cc7be12c843b0a555a979fe209c4eb96771310a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 1 Mar 2025 08:16:30 -0800 +Subject: fbdev: hyperv_fb: Simplify hvfb_putmem + +From: Saurabh Sengar + +[ Upstream commit f5e728a50bb17336a20803dde488515b833ecd1d ] + +The device object required in 'hvfb_release_phymem' function +for 'dma_free_coherent' can also be obtained from the 'info' +pointer, making 'hdev' parameter in 'hvfb_putmem' redundant. +Remove the unnecessary 'hdev' argument from 'hvfb_putmem'. + +Signed-off-by: Saurabh Sengar +Reviewed-by: Michael Kelley +Tested-by: Michael Kelley +Link: https://lore.kernel.org/r/1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com +Signed-off-by: Wei Liu +Message-ID: <1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com> +Stable-dep-of: ea2f45ab0e53 ("fbdev: hyperv_fb: Allow graceful removal of framebuffer") +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/hyperv_fb.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c +index ce23d0ef5702a..9798a34ac571f 100644 +--- a/drivers/video/fbdev/hyperv_fb.c ++++ b/drivers/video/fbdev/hyperv_fb.c +@@ -952,7 +952,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, + } + + /* Release contiguous physical memory */ +-static void hvfb_release_phymem(struct hv_device *hdev, ++static void hvfb_release_phymem(struct device *device, + phys_addr_t paddr, unsigned int size) + { + unsigned int order = get_order(size); +@@ -960,7 +960,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, + if (order <= MAX_PAGE_ORDER) + __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); + else +- dma_free_coherent(&hdev->device, ++ dma_free_coherent(device, + round_up(size, PAGE_SIZE), + phys_to_virt(paddr), + paddr); +@@ -1080,7 +1080,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) + } + + /* Release the framebuffer */ +-static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) ++static void hvfb_putmem(struct fb_info *info) + { + struct hvfb_par *par = info->par; + +@@ -1089,7 +1089,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) + iounmap(par->mmio_vp); + vmbus_free_mmio(par->mem->start, screen_fb_size); + } else { +- hvfb_release_phymem(hdev, info->fix.smem_start, ++ hvfb_release_phymem(info->device, info->fix.smem_start, + screen_fb_size); + } + +@@ -1203,7 +1203,7 @@ static int hvfb_probe(struct hv_device *hdev, + + error: + fb_deferred_io_cleanup(info); +- hvfb_putmem(hdev, info); ++ hvfb_putmem(info); + error2: + vmbus_close(hdev->channel); + error1: +@@ -1232,7 +1232,7 @@ static void hvfb_remove(struct hv_device *hdev) + vmbus_close(hdev->channel); + hv_set_drvdata(hdev, NULL); + +- hvfb_putmem(hdev, info); ++ hvfb_putmem(info); + framebuffer_release(info); + } + +-- +2.39.5 + diff --git a/queue-6.13/gre-fix-ipv6-link-local-address-generation.patch b/queue-6.13/gre-fix-ipv6-link-local-address-generation.patch new file mode 100644 index 0000000000..4fad089af9 --- /dev/null +++ b/queue-6.13/gre-fix-ipv6-link-local-address-generation.patch @@ -0,0 +1,110 @@ +From f45f915cb86374c17243f11f339f8a0fd8e8a012 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 20:28:53 +0100 +Subject: gre: Fix IPv6 link-local address generation. + +From: Guillaume Nault + +[ Upstream commit 183185a18ff96751db52a46ccf93fff3a1f42815 ] + +Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE +devices in most cases and fall back to using add_v4_addrs() only in +case the GRE configuration is incompatible with addrconf_addr_gen(). + +GRE used to use addrconf_addr_gen() until commit e5dd729460ca +("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL +address") restricted this use to gretap and ip6gretap devices, and +created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. + +The original problem came when commit 9af28511be10 ("addrconf: refuse +isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its +addr parameter was 0. The commit says that this would create an invalid +address, however, I couldn't find any RFC saying that the generated +interface identifier would be wrong. Anyway, since gre over IPv4 +devices pass their local tunnel address to __ipv6_isatap_ifid(), that +commit broke their IPv6 link-local address generation when the local +address was unspecified. + +Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT +interfaces when computing v6LL address") tried to fix that case by +defining add_v4_addrs() and calling it to generate the IPv6 link-local +address instead of using addrconf_addr_gen() (apart for gretap and +ip6gretap devices, which would still use the regular +addrconf_addr_gen(), since they have a MAC address). + +That broke several use cases because add_v4_addrs() isn't properly +integrated into the rest of IPv6 Neighbor Discovery code. Several of +these shortcomings have been fixed over time, but add_v4_addrs() +remains broken on several aspects. In particular, it doesn't send any +Router Sollicitations, so the SLAAC process doesn't start until the +interface receives a Router Advertisement. Also, add_v4_addrs() mostly +ignores the address generation mode of the interface +(/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the +IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. + +Fix the situation by using add_v4_addrs() only in the specific scenario +where the normal method would fail. That is, for interfaces that have +all of the following characteristics: + + * run over IPv4, + * transport IP packets directly, not Ethernet (that is, not gretap + interfaces), + * tunnel endpoint is INADDR_ANY (that is, 0), + * device address generation mode is EUI64. + +In all other cases, revert back to the regular addrconf_addr_gen(). + +Also, remove the special case for ip6gre interfaces in add_v4_addrs(), +since ip6gre devices now always use addrconf_addr_gen() instead. + +Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") +Signed-off-by: Guillaume Nault +Reviewed-by: Ido Schimmel +Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv6/addrconf.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 0e765466d7f79..bdf39388e514b 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3237,16 +3237,13 @@ static void add_v4_addrs(struct inet6_dev *idev) + struct in6_addr addr; + struct net_device *dev; + struct net *net = dev_net(idev->dev); +- int scope, plen, offset = 0; ++ int scope, plen; + u32 pflags = 0; + + ASSERT_RTNL(); + + memset(&addr, 0, sizeof(struct in6_addr)); +- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ +- if (idev->dev->addr_len == sizeof(struct in6_addr)) +- offset = sizeof(struct in6_addr) - 4; +- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); ++ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); + + if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { + scope = IPV6_ADDR_COMPATv4; +@@ -3557,7 +3554,13 @@ static void addrconf_gre_config(struct net_device *dev) + return; + } + +- if (dev->type == ARPHRD_ETHER) { ++ /* Generate the IPv6 link-local address using addrconf_addr_gen(), ++ * unless we have an IPv4 GRE device not bound to an IP address and ++ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this ++ * case). Such devices fall back to add_v4_addrs() instead. ++ */ ++ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && ++ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { + addrconf_addr_gen(idev, true); + return; + } +-- +2.39.5 + diff --git a/queue-6.13/ice-do-not-configure-destination-override-for-switch.patch b/queue-6.13/ice-do-not-configure-destination-override-for-switch.patch new file mode 100644 index 0000000000..cb7ab1389c --- /dev/null +++ b/queue-6.13/ice-do-not-configure-destination-override-for-switch.patch @@ -0,0 +1,119 @@ +From 5bfb144783281c4706f5d25b7f745efdadd454cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 15:08:53 +0100 +Subject: ice: do not configure destination override for switchdev + +From: Larysa Zaremba + +[ Upstream commit 3be83ee9de0298f8321aa0b148d8f9995102e40f ] + +After switchdev is enabled and disabled later, LLDP packets sending stops, +despite working perfectly fine before and during switchdev state. +To reproduce (creating/destroying VF is what triggers the reconfiguration): + +devlink dev eswitch set pci/
mode switchdev +echo '2' > /sys/class/net//device/sriov_numvfs +echo '0' > /sys/class/net//device/sriov_numvfs + +This happens because LLDP relies on the destination override functionality. +It needs to 1) set a flag in the descriptor, 2) set the VSI permission to +make it valid. The permissions are set when the PF VSI is first configured, +but switchdev then enables it for the uplink VSI (which is always the PF) +once more when configured and disables when deconfigured, which leads to +software-generated LLDP packets being blocked. + +Do not modify the destination override permissions when configuring +switchdev, as the enabled state is the default configuration that is never +modified. + +Fixes: 1a1c40df2e80 ("ice: set and release switchdev environment") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Larysa Zaremba +Reviewed-by: Simon Horman +Tested-by: Sujai Buvaneswaran +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ------ + drivers/net/ethernet/intel/ice/ice_lib.c | 18 ------------------ + drivers/net/ethernet/intel/ice/ice_lib.h | 4 ---- + 3 files changed, 28 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c +index d649c197cf673..ed21d7f55ac11 100644 +--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c ++++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c +@@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) + if (vlan_ops->dis_rx_filtering(uplink_vsi)) + goto err_vlan_filtering; + +- if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) +- goto err_override_uplink; +- + if (ice_vsi_update_local_lb(uplink_vsi, true)) + goto err_override_local_lb; + +@@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) + err_up: + ice_vsi_update_local_lb(uplink_vsi, false); + err_override_local_lb: +- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); +-err_override_uplink: + vlan_ops->ena_rx_filtering(uplink_vsi); + err_vlan_filtering: + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, +@@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) + vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); + + ice_vsi_update_local_lb(uplink_vsi, false); +- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); + vlan_ops->ena_rx_filtering(uplink_vsi); + ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, + ICE_FLTR_TX); +diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c +index a7d45a8ce7ac0..e07fc8851e1dc 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_lib.c +@@ -3930,24 +3930,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + } + +-/** +- * ice_vsi_ctx_set_allow_override - allow destination override on VSI +- * @ctx: pointer to VSI ctx structure +- */ +-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) +-{ +- ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; +-} +- +-/** +- * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI +- * @ctx: pointer to VSI ctx structure +- */ +-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) +-{ +- ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; +-} +- + /** + * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit + * @vsi: pointer to VSI structure +diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h +index 10d6fc479a321..6085039bac952 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lib.h ++++ b/drivers/net/ethernet/intel/ice/ice_lib.h +@@ -104,10 +104,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) + void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); + + void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); +- +-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); +- +-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); + int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); + int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); + int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); +-- +2.39.5 + diff --git a/queue-6.13/ice-fix-memory-leak-in-arfs-after-reset.patch b/queue-6.13/ice-fix-memory-leak-in-arfs-after-reset.patch new file mode 100644 index 0000000000..db3a61ea62 --- /dev/null +++ b/queue-6.13/ice-fix-memory-leak-in-arfs-after-reset.patch @@ -0,0 +1,68 @@ +From 3b499c8638e63b419f7d7b93558bf2678f5a94dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jan 2025 09:15:39 +0100 +Subject: ice: fix memory leak in aRFS after reset + +From: Grzegorz Nitka + +[ Upstream commit 23d97f18901ef5e4e264e3b1777fe65c760186b5 ] + +Fix aRFS (accelerated Receive Flow Steering) structures memory leak by +adding a checker to verify if aRFS memory is already allocated while +configuring VSI. aRFS objects are allocated in two cases: +- as part of VSI initialization (at probe), and +- as part of reset handling + +However, VSI reconfiguration executed during reset involves memory +allocation one more time, without prior releasing already allocated +resources. This led to the memory leak with the following signature: + +[root@os-delivery ~]# cat /sys/kernel/debug/kmemleak +unreferenced object 0xff3c1ca7252e6000 (size 8192): + comm "kworker/0:0", pid 8, jiffies 4296833052 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc 0): + [] __kmalloc_cache_noprof+0x275/0x340 + [] ice_init_arfs+0x3a/0xe0 [ice] + [] ice_vsi_cfg_def+0x607/0x850 [ice] + [] ice_vsi_setup+0x5b/0x130 [ice] + [] ice_init+0x1c1/0x460 [ice] + [] ice_probe+0x2af/0x520 [ice] + [] local_pci_probe+0x43/0xa0 + [] work_for_cpu_fn+0x13/0x20 + [] process_one_work+0x179/0x390 + [] worker_thread+0x239/0x340 + [] kthread+0xcc/0x100 + [] ret_from_fork+0x2d/0x50 + [] ret_from_fork_asm+0x1a/0x30 + ... + +Fixes: 28bf26724fdb ("ice: Implement aRFS") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Grzegorz Nitka +Reviewed-by: Simon Horman +Tested-by: Rinitha S (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c +index 7cee365cc7d16..405ddd17de1bf 100644 +--- a/drivers/net/ethernet/intel/ice/ice_arfs.c ++++ b/drivers/net/ethernet/intel/ice/ice_arfs.c +@@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) + struct hlist_head *arfs_fltr_list; + unsigned int i; + +- if (!vsi || vsi->type != ICE_VSI_PF) ++ if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) + return; + + arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), +-- +2.39.5 + diff --git a/queue-6.13/ice-fix-switchdev-slow-path-in-lag.patch b/queue-6.13/ice-fix-switchdev-slow-path-in-lag.patch new file mode 100644 index 0000000000..ab3084af15 --- /dev/null +++ b/queue-6.13/ice-fix-switchdev-slow-path-in-lag.patch @@ -0,0 +1,116 @@ +From be4133abc418acc374e2ecd3a67ba28eea4acfc8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Jan 2025 20:07:52 +0100 +Subject: ice: Fix switchdev slow-path in LAG + +From: Marcin Szycik + +[ Upstream commit dce97cb0a3e34204c0b99345418a714eac85953f ] + +Ever since removing switchdev control VSI and using PF for port +representor Tx/Rx, switchdev slow-path has been working improperly after +failover in SR-IOV LAG. LAG assumes that the first uplink to be added to +the aggregate will own VFs and have switchdev configured. After +failing-over to the other uplink, representors are still configured to +Tx through the uplink they are set up on, which fails because that +uplink is now down. + +On failover, update all PRs on primary uplink to use the currently +active uplink for Tx. Call netif_keep_dst(), as the secondary uplink +might not be in switchdev mode. Also make sure to call +ice_eswitch_set_target_vsi() if uplink is in LAG. + +On the Rx path, representors are already working properly, because +default Tx from VFs is set to PF owning the eswitch. After failover the +same PF is receiving traffic from VFs, even though link is down. + +Fixes: defd52455aee ("ice: do Tx through PF netdev in slow-path") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Marcin Szycik +Reviewed-by: Simon Horman +Tested-by: Rafal Romanowski +Tested-by: Sujai Buvaneswaran +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_lag.c | 27 +++++++++++++++++++++++ + drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++- + 2 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c +index 1ccb572ce285d..22371011c2492 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lag.c ++++ b/drivers/net/ethernet/intel/ice/ice_lag.c +@@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag) + netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); + } + ++/** ++ * ice_lag_config_eswitch - configure eswitch to work with LAG ++ * @lag: lag info struct ++ * @netdev: active network interface device struct ++ * ++ * Updates all port representors in eswitch to use @netdev for Tx. ++ * ++ * Configures the netdev to keep dst metadata (also used in representor Tx). ++ * This is required for an uplink without switchdev mode configured. ++ */ ++static void ice_lag_config_eswitch(struct ice_lag *lag, ++ struct net_device *netdev) ++{ ++ struct ice_repr *repr; ++ unsigned long id; ++ ++ xa_for_each(&lag->pf->eswitch.reprs, id, repr) ++ repr->dst->u.port_info.lower_dev = netdev; ++ ++ netif_keep_dst(netdev); ++} ++ + /** + * ice_lag_unlink - handle unlink event + * @lag: LAG info struct +@@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) + ice_lag_move_vf_nodes(lag, act_port, pri_port); + lag->primary = false; + lag->active_port = ICE_LAG_INVALID_PORT; ++ ++ /* Config primary's eswitch back to normal operation. */ ++ ice_lag_config_eswitch(lag, lag->netdev); + } else { + struct ice_lag *primary_lag; + +@@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) + ice_lag_move_vf_nodes(lag, prim_port, + event_port); + lag->active_port = event_port; ++ ice_lag_config_eswitch(lag, event_netdev); + return; + } + +@@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) + /* new active port */ + ice_lag_move_vf_nodes(lag, lag->active_port, event_port); + lag->active_port = event_port; ++ ice_lag_config_eswitch(lag, event_netdev); + } else { + /* port not set as currently active (e.g. new active port + * has already claimed the nodes and filters +diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c +index 9c9ea4c1b93b7..380ba1e8b3b2c 100644 +--- a/drivers/net/ethernet/intel/ice/ice_txrx.c ++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c +@@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) + ICE_TXD_CTX_QW1_CMD_S); + + ice_tstamp(tx_ring, skb, first, &offload); +- if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) ++ if ((ice_is_switchdev_running(vsi->back) || ++ ice_lag_is_switchdev_running(vsi->back)) && ++ vsi->type != ICE_VSI_SF) + ice_eswitch_set_target_vsi(skb, &offload); + + if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { +-- +2.39.5 + diff --git a/queue-6.13/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch b/queue-6.13/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch new file mode 100644 index 0000000000..a8bbfc40d3 --- /dev/null +++ b/queue-6.13/ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch @@ -0,0 +1,68 @@ +From 8841db5484df6465929870338ea21976c9561ecb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Mar 2025 10:45:53 +0300 +Subject: ipvs: prevent integer overflow in do_ip_vs_get_ctl() + +From: Dan Carpenter + +[ Upstream commit 80b78c39eb86e6b55f56363b709eb817527da5aa ] + +The get->num_services variable is an unsigned int which is controlled by +the user. The struct_size() function ensures that the size calculation +does not overflow an unsigned long, however, we are saving the result to +an int so the calculation can overflow. + +Both "len" and "get->num_services" come from the user. This check is +just a sanity check to help the user and ensure they are using the API +correctly. An integer overflow here is not a big deal. This has no +security impact. + +Save the result from struct_size() type size_t to fix this integer +overflow bug. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Dan Carpenter +Acked-by: Julian Anastasov +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c +index 7d13110ce1882..0633276d96bfb 100644 +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_SERVICES: + { + struct ip_vs_get_services *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_services *)arg; + size = struct_size(get, entrytable, get->num_services); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +@@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) + case IP_VS_SO_GET_DESTS: + { + struct ip_vs_get_dests *get; +- int size; ++ size_t size; + + get = (struct ip_vs_get_dests *)arg; + size = struct_size(get, entrytable, get->num_dests); + if (*len != size) { +- pr_err("length: %u != %u\n", *len, size); ++ pr_err("length: %u != %zu\n", *len, size); + ret = -EINVAL; + goto out; + } +-- +2.39.5 + diff --git a/queue-6.13/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch b/queue-6.13/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch new file mode 100644 index 0000000000..f2a4fcf4b3 --- /dev/null +++ b/queue-6.13/net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch @@ -0,0 +1,137 @@ +From 627c7fa956b0d9a12b62db32e013c7d39766109b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 12:23:05 -0500 +Subject: net: dsa: mv88e6xxx: Verify after ATU Load ops + +From: Joseph Huang + +[ Upstream commit dc5340c3133a3ebe54853fd299116149e528cfaa ] + +ATU Load operations could fail silently if there's not enough space +on the device to hold the new entry. When this happens, the symptom +depends on the unknown flood settings. If unknown multicast flood is +disabled, the multicast packets are dropped when the ATU table is +full. If unknown multicast flood is enabled, the multicast packets +will be flooded to all ports. Either way, IGMP snooping is broken +when the ATU Load operation fails silently. + +Do a Read-After-Write verification after each fdb/mdb add operation +to make sure that the operation was really successful, and return +-ENOSPC otherwise. + +Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext") +Signed-off-by: Joseph Huang +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------ + 1 file changed, 48 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 3a792f79270d9..d6e8398c07608 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -2224,13 +2224,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, + return err; + } + +-static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, +- const unsigned char *addr, u16 vid, +- u8 state) ++static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip, ++ const unsigned char *addr, u16 vid, ++ u16 *fid, struct mv88e6xxx_atu_entry *entry) + { +- struct mv88e6xxx_atu_entry entry; + struct mv88e6xxx_vtu_entry vlan; +- u16 fid; + int err; + + /* Ports have two private address databases: one for when the port is +@@ -2241,7 +2239,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + * VLAN ID into the port's database used for VLAN-unaware bridging. + */ + if (vid == 0) { +- fid = MV88E6XXX_FID_BRIDGED; ++ *fid = MV88E6XXX_FID_BRIDGED; + } else { + err = mv88e6xxx_vtu_get(chip, vid, &vlan); + if (err) +@@ -2251,14 +2249,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + if (!vlan.valid) + return -EOPNOTSUPP; + +- fid = vlan.fid; ++ *fid = vlan.fid; + } + +- entry.state = 0; +- ether_addr_copy(entry.mac, addr); +- eth_addr_dec(entry.mac); ++ entry->state = 0; ++ ether_addr_copy(entry->mac, addr); ++ eth_addr_dec(entry->mac); ++ ++ return mv88e6xxx_g1_atu_getnext(chip, *fid, entry); ++} ++ ++static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip, ++ const unsigned char *addr, u16 vid) ++{ ++ struct mv88e6xxx_atu_entry entry; ++ u16 fid; ++ int err; + +- err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry); ++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); ++ if (err) ++ return false; ++ ++ return entry.state && ether_addr_equal(entry.mac, addr); ++} ++ ++static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, ++ const unsigned char *addr, u16 vid, ++ u8 state) ++{ ++ struct mv88e6xxx_atu_entry entry; ++ u16 fid; ++ int err; ++ ++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry); + if (err) + return err; + +@@ -2862,6 +2885,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, + MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); ++ if (err) ++ goto out; ++ ++ if (!mv88e6xxx_port_db_find(chip, addr, vid)) ++ err = -ENOSPC; ++ ++out: + mv88e6xxx_reg_unlock(chip); + + return err; +@@ -6596,6 +6626,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); ++ if (err) ++ goto out; ++ ++ if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid)) ++ err = -ENOSPC; ++ ++out: + mv88e6xxx_reg_unlock(chip); + + return err; +-- +2.39.5 + diff --git a/queue-6.13/net-mctp-i2c-copy-headers-if-cloned.patch b/queue-6.13/net-mctp-i2c-copy-headers-if-cloned.patch new file mode 100644 index 0000000000..9fe25e2c74 --- /dev/null +++ b/queue-6.13/net-mctp-i2c-copy-headers-if-cloned.patch @@ -0,0 +1,47 @@ +From e0e6c094703abb30f5c87c1dc329a01f288fa770 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 10:33:20 +0800 +Subject: net: mctp i2c: Copy headers if cloned + +From: Matt Johnston + +[ Upstream commit df8ce77ba8b7c012a3edd1ca7368b46831341466 ] + +Use skb_cow_head() prior to modifying the TX SKB. This is necessary +when the SKB has been cloned, to avoid modifying other shared clones. + +Signed-off-by: Matt Johnston +Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") +Link: https://patch.msgid.link/20250306-matt-mctp-i2c-cow-v1-1-293827212681@codeconstruct.com.au +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/mctp/mctp-i2c.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c +index d2b3f5a591418..a0dba912aecea 100644 +--- a/drivers/net/mctp/mctp-i2c.c ++++ b/drivers/net/mctp/mctp-i2c.c +@@ -584,6 +584,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, + struct mctp_i2c_hdr *hdr; + struct mctp_hdr *mhdr; + u8 lldst, llsrc; ++ int rc; + + if (len > MCTP_I2C_MAXMTU) + return -EMSGSIZE; +@@ -594,6 +595,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, + lldst = *((u8 *)daddr); + llsrc = *((u8 *)saddr); + ++ rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr)); ++ if (rc) ++ return rc; ++ + skb_push(skb, sizeof(struct mctp_i2c_hdr)); + skb_reset_mac_header(skb); + hdr = (void *)skb_mac_header(skb); +-- +2.39.5 + diff --git a/queue-6.13/net-mctp-i3c-copy-headers-if-cloned.patch b/queue-6.13/net-mctp-i3c-copy-headers-if-cloned.patch new file mode 100644 index 0000000000..6d1f81232b --- /dev/null +++ b/queue-6.13/net-mctp-i3c-copy-headers-if-cloned.patch @@ -0,0 +1,44 @@ +From e4c9e31b11cbe2cde4a09b19c654957a1d0606c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 18:24:18 +0800 +Subject: net: mctp i3c: Copy headers if cloned + +From: Matt Johnston + +[ Upstream commit 26db9c9ee19c36a97dbb1cfef007a3c189c4c874 ] + +Use skb_cow_head() prior to modifying the tx skb. This is necessary +when the skb has been cloned, to avoid modifying other shared clones. + +Signed-off-by: Matt Johnston +Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver") +Link: https://patch.msgid.link/20250306-matt-i3c-cow-head-v1-1-d5e6a5495227@codeconstruct.com.au +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/mctp/mctp-i3c.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c +index c1e72253063b5..c678f79aa3561 100644 +--- a/drivers/net/mctp/mctp-i3c.c ++++ b/drivers/net/mctp/mctp-i3c.c +@@ -506,10 +506,15 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, + const void *saddr, unsigned int len) + { + struct mctp_i3c_internal_hdr *ihdr; ++ int rc; + + if (!daddr || !saddr) + return -EINVAL; + ++ rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr)); ++ if (rc) ++ return rc; ++ + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); + skb_reset_mac_header(skb); + ihdr = (void *)skb_mac_header(skb); +-- +2.39.5 + diff --git a/queue-6.13/net-mctp-unshare-packets-when-reassembling.patch b/queue-6.13/net-mctp-unshare-packets-when-reassembling.patch new file mode 100644 index 0000000000..72def1d7be --- /dev/null +++ b/queue-6.13/net-mctp-unshare-packets-when-reassembling.patch @@ -0,0 +1,181 @@ +From 41807a22787e63b25ee89736a2c1cc4e65bcb70c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 10:32:45 +0800 +Subject: net: mctp: unshare packets when reassembling + +From: Matt Johnston + +[ Upstream commit f5d83cf0eeb90fade4d5c4d17d24b8bee9ceeecc ] + +Ensure that the frag_list used for reassembly isn't shared with other +packets. This avoids incorrect reassembly when packets are cloned, and +prevents a memory leak due to circular references between fragments and +their skb_shared_info. + +The upcoming MCTP-over-USB driver uses skb_clone which can trigger the +problem - other MCTP drivers don't share SKBs. + +A kunit test is added to reproduce the issue. + +Signed-off-by: Matt Johnston +Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly") +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306-matt-mctp-usb-v1-1-085502b3dd28@codeconstruct.com.au +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/mctp/route.c | 10 +++- + net/mctp/test/route-test.c | 109 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 117 insertions(+), 2 deletions(-) + +diff --git a/net/mctp/route.c b/net/mctp/route.c +index 3f2bd65ff5e3c..4c460160914f0 100644 +--- a/net/mctp/route.c ++++ b/net/mctp/route.c +@@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) + & MCTP_HDR_SEQ_MASK; + + if (!key->reasm_head) { +- key->reasm_head = skb; +- key->reasm_tailp = &(skb_shinfo(skb)->frag_list); ++ /* Since we're manipulating the shared frag_list, ensure it isn't ++ * shared with any other SKBs. ++ */ ++ key->reasm_head = skb_unshare(skb, GFP_ATOMIC); ++ if (!key->reasm_head) ++ return -ENOMEM; ++ ++ key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); + key->last_seq = this_seq; + return 0; + } +diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c +index 17165b86ce22d..06c1897b685a8 100644 +--- a/net/mctp/test/route-test.c ++++ b/net/mctp/test/route-test.c +@@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) + __mctp_route_test_fini(test, dev, rt, sock); + } + ++/* Input route to socket, using a fragmented message created from clones. ++ */ ++static void mctp_test_route_input_cloned_frag(struct kunit *test) ++{ ++ /* 5 packet fragments, forming 2 complete messages */ ++ const struct mctp_hdr hdrs[5] = { ++ RX_FRAG(FL_S, 0), ++ RX_FRAG(0, 1), ++ RX_FRAG(FL_E, 2), ++ RX_FRAG(FL_S, 0), ++ RX_FRAG(FL_E, 1), ++ }; ++ struct mctp_test_route *rt; ++ struct mctp_test_dev *dev; ++ struct sk_buff *skb[5]; ++ struct sk_buff *rx_skb; ++ struct socket *sock; ++ size_t data_len; ++ u8 compare[100]; ++ u8 flat[100]; ++ size_t total; ++ void *p; ++ int rc; ++ ++ /* Arbitrary length */ ++ data_len = 3; ++ total = data_len + sizeof(struct mctp_hdr); ++ ++ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); ++ ++ /* Create a single skb initially with concatenated packets */ ++ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); ++ mctp_test_skb_set_dev(skb[0], dev); ++ memset(skb[0]->data, 0 * 0x11, skb[0]->len); ++ memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr)); ++ ++ /* Extract and populate packets */ ++ for (int i = 1; i < 5; i++) { ++ skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC); ++ KUNIT_ASSERT_TRUE(test, skb[i]); ++ p = skb_pull(skb[i], total); ++ KUNIT_ASSERT_TRUE(test, p); ++ skb_reset_network_header(skb[i]); ++ memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr)); ++ memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len); ++ } ++ for (int i = 0; i < 5; i++) ++ skb_trim(skb[i], total); ++ ++ /* SOM packets have a type byte to match the socket */ ++ skb[0]->data[4] = 0; ++ skb[3]->data[4] = 0; ++ ++ skb_dump("pkt1 ", skb[0], false); ++ skb_dump("pkt2 ", skb[1], false); ++ skb_dump("pkt3 ", skb[2], false); ++ skb_dump("pkt4 ", skb[3], false); ++ skb_dump("pkt5 ", skb[4], false); ++ ++ for (int i = 0; i < 5; i++) { ++ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); ++ /* Take a reference so we can check refcounts at the end */ ++ skb_get(skb[i]); ++ } ++ ++ /* Feed the fragments into MCTP core */ ++ for (int i = 0; i < 5; i++) { ++ rc = mctp_route_input(&rt->rt, skb[i]); ++ KUNIT_EXPECT_EQ(test, rc, 0); ++ } ++ ++ /* Receive first reassembled message */ ++ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); ++ KUNIT_EXPECT_EQ(test, rc, 0); ++ KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len); ++ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); ++ for (int i = 0; i < rx_skb->len; i++) ++ compare[i] = (i / data_len) * 0x11; ++ /* Set type byte */ ++ compare[0] = 0; ++ ++ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); ++ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); ++ kfree_skb(rx_skb); ++ ++ /* Receive second reassembled message */ ++ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); ++ KUNIT_EXPECT_EQ(test, rc, 0); ++ KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len); ++ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); ++ for (int i = 0; i < rx_skb->len; i++) ++ compare[i] = (i / data_len + 3) * 0x11; ++ /* Set type byte */ ++ compare[0] = 0; ++ ++ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); ++ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); ++ kfree_skb(rx_skb); ++ ++ /* Check input skb refcounts */ ++ for (int i = 0; i < 5; i++) { ++ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); ++ kfree_skb(skb[i]); ++ } ++ ++ __mctp_route_test_fini(test, dev, rt, sock); ++} ++ + #if IS_ENABLED(CONFIG_MCTP_FLOWS) + + static void mctp_test_flow_init(struct kunit *test, +@@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = { + KUNIT_CASE(mctp_test_packet_flow), + KUNIT_CASE(mctp_test_fragment_flow), + KUNIT_CASE(mctp_test_route_output_key_create), ++ KUNIT_CASE(mctp_test_route_input_cloned_frag), + {} + }; + +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch b/queue-6.13/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch new file mode 100644 index 0000000000..c850f675cc --- /dev/null +++ b/queue-6.13/net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch @@ -0,0 +1,129 @@ +From dbe690771138a248ff053cd4865cda538cb1c0db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:43 +0200 +Subject: net/mlx5: Bridge, fix the crash caused by LAG state check + +From: Jianbo Liu + +[ Upstream commit 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb ] + +When removing LAG device from bridge, NETDEV_CHANGEUPPER event is +triggered. Driver finds the lower devices (PFs) to flush all the +offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns +false if one of PF is unloaded. In such case, +mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of +the alive PF, and the flush is skipped. + +Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge +event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be +ignored in this case because the upper interface for bond is deleted, +and the entry will never be aged because lastuse is never updated. + +To make things worse, as the entry is alive, mlx5 bridge workqueue +keeps sending that event, which is then handled by kernel bridge +notifier. It causes the following crash when accessing the passed bond +netdev which is already destroyed. + +To fix this issue, remove such checks. LAG state is already checked in +commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding +bond to bridge"), driver still need to skip offload if LAG becomes +invalid state after initialization. + + Oops: stack segment: 0000 [#1] SMP + CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1 + Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core] + RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge] + Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7 + RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297 + RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff + RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0 + RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8 + R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60 + R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + + ? __die_body+0x1a/0x60 + ? die+0x38/0x60 + ? do_trap+0x10b/0x120 + ? do_error_trap+0x64/0xa0 + ? exc_stack_segment+0x33/0x50 + ? asm_exc_stack_segment+0x22/0x30 + ? br_switchdev_event+0x2c/0x110 [bridge] + ? sched_balance_newidle.isra.149+0x248/0x390 + notifier_call_chain+0x4b/0xa0 + atomic_notifier_call_chain+0x16/0x20 + mlx5_esw_bridge_update+0xec/0x170 [mlx5_core] + mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core] + process_scheduled_works+0x81/0x390 + worker_thread+0x106/0x250 + ? bh_worker+0x110/0x110 + kthread+0xb7/0xe0 + ? kthread_park+0x80/0x80 + ret_from_fork+0x2d/0x50 + ? kthread_park+0x80/0x80 + ret_from_fork_asm+0x11/0x20 + + +Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") +Signed-off-by: Jianbo Liu +Reviewed-by: Vlad Buslov +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +index 5d128c5b4529a..0f5d7ea8956f7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +@@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower, iter) { +- struct mlx5_core_dev *mdev; +- struct mlx5e_priv *priv; +- + if (!mlx5e_eswitch_rep(lower)) + continue; + +- priv = netdev_priv(lower); +- mdev = priv->mdev; +- if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) ++ if (mlx5_esw_bridge_dev_same_esw(lower, esw)) + return lower; + } + +@@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * + priv = netdev_priv(rep); + mdev = priv->mdev; + if (netif_is_lag_master(dev)) +- return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); ++ return mlx5_lag_is_master(mdev); + return true; + } + +@@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + if (!rep) + return NOTIFY_DONE; + ++ if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) ++ return NOTIFY_DONE; ++ + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + fdb_info = container_of(info, +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-fill-out-devlink-dev-info-only-for-pfs.patch b/queue-6.13/net-mlx5-fill-out-devlink-dev-info-only-for-pfs.patch new file mode 100644 index 0000000000..7546c63212 --- /dev/null +++ b/queue-6.13/net-mlx5-fill-out-devlink-dev-info-only-for-pfs.patch @@ -0,0 +1,45 @@ +From bf1f08d7fc3aa13188b5b8b05f4f1f4a5a3f8a4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 23:25:29 +0200 +Subject: net/mlx5: Fill out devlink dev info only for PFs + +From: Jiri Pirko + +[ Upstream commit d749d901b2168389f060b654fdaa08acf6b367d2 ] + +Firmware version query is supported on the PFs. Due to this +following kernel warning log is observed: + +[ 188.590344] mlx5_core 0000:08:00.2: mlx5_fw_version_query:816:(pid 1453): fw query isn't supported by the FW + +Fix it by restricting the query and devlink info to the PF. + +Fixes: 8338d9378895 ("net/mlx5: Added devlink info callback") +Signed-off-by: Jiri Pirko +Reviewed-by: Kalesh AP +Signed-off-by: Tariq Toukan +Reviewed-by: Parav Pandit +Link: https://patch.msgid.link/20250306212529.429329-1-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +index 98d4306929f3e..a2cf3e79693dd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +@@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + u32 running_fw, stored_fw; + int err; + ++ if (!mlx5_core_is_pf(dev)) ++ return 0; ++ + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-fix-incorrect-irq-pool-usage-when-releasing.patch b/queue-6.13/net-mlx5-fix-incorrect-irq-pool-usage-when-releasing.patch new file mode 100644 index 0000000000..5de26c730a --- /dev/null +++ b/queue-6.13/net-mlx5-fix-incorrect-irq-pool-usage-when-releasing.patch @@ -0,0 +1,152 @@ +From 6d0973d4908224fb43b6507696d5d48f7332815b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:41 +0200 +Subject: net/mlx5: Fix incorrect IRQ pool usage when releasing IRQs + +From: Shay Drory + +[ Upstream commit 32d2724db5b2361ab293427ccd5c24f4f2bcca14 ] + +mlx5_irq_pool_get() is a getter for completion IRQ pool only. +However, after the cited commit, mlx5_irq_pool_get() is called during +ctrl IRQ release flow to retrieve the pool, resulting in the use of an +incorrect IRQ pool. + +Hence, use the newly introduced mlx5_irq_get_pool() getter to retrieve +the correct IRQ pool based on the IRQ itself. While at it, rename +mlx5_irq_pool_get() to mlx5_irq_table_get_comp_irq_pool() which +accurately reflects its purpose and improves code readability. + +Fixes: 0477d5168bbb ("net/mlx5: Expose SFs IRQs") +Signed-off-by: Shay Drory +Reviewed-by: Maher Sanalla +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-4-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- + .../net/ethernet/mellanox/mlx5/core/irq_affinity.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 4 +++- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 13 ++++++++++--- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 2 +- + 5 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +index 2b229b6226c6a..dfb079e59d858 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -871,8 +871,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) + + static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) + { ++ struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); + struct mlx5_eq_table *table = dev->priv.eq_table; +- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct irq_affinity_desc af_desc = {}; + struct mlx5_irq *irq; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +index 1477db7f5307e..2691d88cdee1f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +@@ -175,7 +175,7 @@ mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool, + + void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) + { +- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); ++ struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq); + int cpu; + + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +index 0881e961d8b17..586688da9940e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +@@ -10,12 +10,15 @@ + + struct mlx5_irq; + struct cpu_rmap; ++struct mlx5_irq_pool; + + int mlx5_irq_table_init(struct mlx5_core_dev *dev); + void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); + int mlx5_irq_table_create(struct mlx5_core_dev *dev); + void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); + void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); ++struct mlx5_irq_pool * ++mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev); + int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); + int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); + struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); +@@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); + int mlx5_irq_get_index(struct mlx5_irq *irq); + int mlx5_irq_get_irq(const struct mlx5_irq *irq); + +-struct mlx5_irq_pool; + #ifdef CONFIG_MLX5_SF + struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +index d9362eabc6a1c..2c5f850c31f68 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +@@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) + return irq->map.index; + } + ++struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq) ++{ ++ return irq->pool; ++} ++ + /* irq_pool API */ + + /* requesting an irq from a given pool according to given index */ +@@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab + return irq_table->sf_ctrl_pool; + } + +-static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) ++static struct mlx5_irq_pool * ++sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table) + { + return irq_table->sf_comp_pool; + } + +-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) ++struct mlx5_irq_pool * ++mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev) + { + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) +- pool = sf_irq_pool_get(irq_table); ++ pool = sf_comp_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +index c4d377f8df308..cc064425fe160 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +@@ -28,7 +28,6 @@ struct mlx5_irq_pool { + struct mlx5_core_dev *dev; + }; + +-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); + static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) + { + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +@@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + int mlx5_irq_get_locked(struct mlx5_irq *irq); + int mlx5_irq_read_locked(struct mlx5_irq *irq); + int mlx5_irq_put(struct mlx5_irq *irq); ++struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq); + + #endif /* __PCI_IRQ_H__ */ +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch b/queue-6.13/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch new file mode 100644 index 0000000000..6c3e148c8d --- /dev/null +++ b/queue-6.13/net-mlx5-handle-errors-in-mlx5_chains_create_table.patch @@ -0,0 +1,46 @@ +From d9bb7db44262297322772594f540ec7cb084c2ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 10:18:20 +0800 +Subject: net/mlx5: handle errors in mlx5_chains_create_table() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Wentao Liang + +[ Upstream commit eab0396353be1c778eba1c0b5180176f04dd21ce ] + +In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns() +and mlx5_get_flow_namespace() must be checked to prevent NULL pointer +dereferences. If either function fails, the function should log error +message with mlx5_core_warn() and return error pointer. + +Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") +Signed-off-by: Wentao Liang +Reviewed-by: Tariq Toukan +Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +index a80ecb672f33d..711d14dea2485 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +@@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + ++ if (!ns) { ++ mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-hws-rightsize-bwc-matcher-priority.patch b/queue-6.13/net-mlx5-hws-rightsize-bwc-matcher-priority.patch new file mode 100644 index 0000000000..177c49751c --- /dev/null +++ b/queue-6.13/net-mlx5-hws-rightsize-bwc-matcher-priority.patch @@ -0,0 +1,44 @@ +From 9c2b44b88fa2fdd12be2abcc39e78a092baa3022 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:40 +0200 +Subject: net/mlx5: HWS, Rightsize bwc matcher priority + +From: Vlad Dogaru + +[ Upstream commit 521992337f67f71ce4436b98bc32563ddb1a5ce3 ] + +The bwc layer was clamping the matcher priority from 32 bits to 16 bits. +This didn't show up until a matcher was resized, since the initial +native matcher was created using the correct 32 bit value. + +The fix also reorders fields to avoid some padding. + +Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") +Signed-off-by: Vlad Dogaru +Reviewed-by: Yevgeny Kliteynik +Reviewed-by: Mark Bloch +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/1741644104-97767-3-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +index 655fa7a22d84f..7c00740f1d130 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +@@ -16,8 +16,8 @@ struct mlx5hws_bwc_matcher { + struct mlx5hws_matcher *matcher; + struct mlx5hws_match_template *mt; + struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; ++ u32 priority; + u8 num_of_at; +- u16 priority; + u8 size_log; + atomic_t num_of_rules; + struct list_head *rules; +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5-lag-check-shared-fdb-before-creating-multip.patch b/queue-6.13/net-mlx5-lag-check-shared-fdb-before-creating-multip.patch new file mode 100644 index 0000000000..e0042a17d5 --- /dev/null +++ b/queue-6.13/net-mlx5-lag-check-shared-fdb-before-creating-multip.patch @@ -0,0 +1,78 @@ +From 77dad285646a1f4fda842e05b8d0fef9363d16c8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:42 +0200 +Subject: net/mlx5: Lag, Check shared fdb before creating MultiPort E-Switch + +From: Shay Drory + +[ Upstream commit 32966984bee1defd9f5a8f9be274d7c32f911ba1 ] + +Currently, MultiPort E-Switch is requesting to create a LAG with shared +FDB without checking the LAG is supporting shared FDB. +Add the check. + +Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode") +Signed-off-by: Shay Drory +Reviewed-by: Mark Bloch +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-5-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++-- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 + + drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 3 ++- + 3 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +index 7f68468c2e759..4b3da7ebd6310 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +@@ -859,7 +859,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) + mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + } + +-static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) ++bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) + { + struct mlx5_core_dev *dev; + int i; +@@ -937,7 +937,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) + } + + if (do_bond && !__mlx5_lag_is_active(ldev)) { +- bool shared_fdb = mlx5_shared_fdb_supported(ldev); ++ bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); + + roce_lag = mlx5_lag_is_roce_lag(ldev); + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +index 50fcb1eee5748..48a5f3e7b91a8 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +@@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) + return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + } + ++bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); + bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); + void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +index 571ea26edd0ca..2381a0eec1900 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +@@ -81,7 +81,8 @@ static int enable_mpesw(struct mlx5_lag *ldev) + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || + !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || +- !mlx5_lag_check_prereq(ldev)) ++ !mlx5_lag_check_prereq(ldev) || ++ !mlx5_lag_shared_fdb_supported(ldev)) + return -EOPNOTSUPP; + + err = mlx5_mpesw_metadata_set(ldev); +-- +2.39.5 + diff --git a/queue-6.13/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch b/queue-6.13/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch new file mode 100644 index 0000000000..2129ca3b53 --- /dev/null +++ b/queue-6.13/net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch @@ -0,0 +1,53 @@ +From d61b7cb202dfcd631c55088220d0f992677a6a06 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 00:01:44 +0200 +Subject: net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed + devices + +From: Carolina Jubran + +[ Upstream commit e92df790d07a8eea873efcb84776e7b71f81c7d5 ] + +mlx5_eswitch_get_vepa returns -EPERM if the device lacks +eswitch_manager capability, blocking mlx5e_bridge_getlink from +retrieving VEPA mode. Since mlx5e_bridge_getlink implements +ndo_bridge_getlink, returning -EPERM causes bridge link show to fail +instead of skipping devices without this capability. + +To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when +mlx5_eswitch_get_vepa fails, ensuring the command continues processing +other devices while ignoring those without the necessary capability. + +Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink") +Signed-off-by: Carolina Jubran +Reviewed-by: Jianbo Liu +Signed-off-by: Tariq Toukan +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index cb93f46eaa7c3..e02bb589104a5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -5131,11 +5131,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; +- int err; + +- err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); +- if (err) +- return err; ++ if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) ++ return -EOPNOTSUPP; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, +-- +2.39.5 + diff --git a/queue-6.13/net-openvswitch-remove-misbehaving-actions-length-ch.patch b/queue-6.13/net-openvswitch-remove-misbehaving-actions-length-ch.patch new file mode 100644 index 0000000000..b46e8a74f3 --- /dev/null +++ b/queue-6.13/net-openvswitch-remove-misbehaving-actions-length-ch.patch @@ -0,0 +1,154 @@ +From 7269e76fb45d9b3ff8d967f076368cff4b1a312d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Mar 2025 01:45:59 +0100 +Subject: net: openvswitch: remove misbehaving actions length check + +From: Ilya Maximets + +[ Upstream commit a1e64addf3ff9257b45b78bc7d743781c3f41340 ] + +The actions length check is unreliable and produces different results +depending on the initial length of the provided netlink attribute and +the composition of the actual actions inside of it. For example, a +user can add 4088 empty clone() actions without triggering -EMSGSIZE, +on attempt to add 4089 such actions the operation will fail with the +-EMSGSIZE verdict. However, if another 16 KB of other actions will +be *appended* to the previous 4089 clone() actions, the check passes +and the flow is successfully installed into the openvswitch datapath. + +The reason for a such a weird behavior is the way memory is allocated. +When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(), +that in turn calls nla_alloc_flow_actions() with either the actual +length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The +function adds the size of the sw_flow_actions structure and then the +actually allocated memory is rounded up to the closest power of two. + +So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE, +then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K. +Later, while copying individual actions, we look at ksize(), which is +64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually +triggered and the user can easily allocate almost 64 KB of actions. + +However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but +the actions contain ones that require size increase while copying +(such as clone() or sample()), then the limit check will be performed +during the reserve_sfa_size() and the user will not be allowed to +create actions that yield more than 32 KB internally. + +This is one part of the problem. The other part is that it's not +actually possible for the userspace application to know beforehand +if the particular set of actions will be rejected or not. + +Certain actions require more space in the internal representation, +e.g. an empty clone() takes 4 bytes in the action list passed in by +the user, but it takes 12 bytes in the internal representation due +to an extra nested attribute, and some actions require less space in +the internal representations, e.g. set(tunnel(..)) normally takes +64+ bytes in the action list provided by the user, but only needs to +store a single pointer in the internal implementation, since all the +data is stored in the tunnel_info structure instead. + +And the action size limit is applied to the internal representation, +not to the action list passed by the user. So, it's not possible for +the userpsace application to predict if the certain combination of +actions will be rejected or not, because it is not possible for it to +calculate how much space these actions will take in the internal +representation without knowing kernel internals. + +All that is causing random failures in ovs-vswitchd in userspace and +inability to handle certain traffic patterns as a result. For example, +it is reported that adding a bit more than a 1100 VMs in an OpenStack +setup breaks the network due to OVS not being able to handle ARP +traffic anymore in some cases (it tries to install a proper datapath +flow, but the kernel rejects it with -EMSGSIZE, even though the action +list isn't actually that large.) + +Kernel behavior must be consistent and predictable in order for the +userspace application to use it in a reasonable way. ovs-vswitchd has +a mechanism to re-direct parts of the traffic and partially handle it +in userspace if the required action list is oversized, but that doesn't +work properly if we can't actually tell if the action list is oversized +or not. + +Solution for this is to check the size of the user-provided actions +instead of the internal representation. This commit just removes the +check from the internal part because there is already an implicit size +check imposed by the netlink protocol. The attribute can't be larger +than 64 KB. Realistically, we could reduce the limit to 32 KB, but +we'll be risking to break some existing setups that rely on the fact +that it's possible to create nearly 64 KB action lists today. + +Vast majority of flows in real setups are below 100-ish bytes. So +removal of the limit will not change real memory consumption on the +system. The absolutely worst case scenario is if someone adds a flow +with 64 KB of empty clone() actions. That will yield a 192 KB in the +internal representation consuming 256 KB block of memory. However, +that list of actions is not meaningful and also a no-op. Real world +very large action lists (that can occur for a rare cases of BUM +traffic handling) are unlikely to contain a large number of clones and +will likely have a lot of tunnel attributes making the internal +representation comparable in size to the original action list. +So, it should be fine to just remove the limit. + +Commit in the 'Fixes' tag is the first one that introduced the +difference between internal representation and the user-provided action +lists, but there were many more afterwards that lead to the situation +we have today. + +Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") +Signed-off-by: Ilya Maximets +Reviewed-by: Aaron Conole +Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/openvswitch/flow_netlink.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c +index 881ddd3696d54..95e0dd14dc1a3 100644 +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) + OVS_FLOW_ATTR_MASK, true, skb); + } + +-#define MAX_ACTIONS_BUFSIZE (32 * 1024) +- + static struct sw_flow_actions *nla_alloc_flow_actions(int size) + { + struct sw_flow_actions *sfa; + +- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); +- + sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); +@@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); + +- if (new_acts_size > MAX_ACTIONS_BUFSIZE) { +- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { +- OVS_NLERR(log, "Flow action size exceeds max %u", +- MAX_ACTIONS_BUFSIZE); +- return ERR_PTR(-EMSGSIZE); +- } +- new_acts_size = MAX_ACTIONS_BUFSIZE; +- } +- + acts = nla_alloc_flow_actions(new_acts_size); + if (IS_ERR(acts)) + return ERR_CAST(acts); +@@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, + int err; + u32 mpls_label_count = 0; + +- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); ++ *sfa = nla_alloc_flow_actions(nla_len(attr)); + if (IS_ERR(*sfa)) + return PTR_ERR(*sfa); + +-- +2.39.5 + diff --git a/queue-6.13/net-switchdev-convert-blocking-notification-chain-to.patch b/queue-6.13/net-switchdev-convert-blocking-notification-chain-to.patch new file mode 100644 index 0000000000..f9c1e16b9d --- /dev/null +++ b/queue-6.13/net-switchdev-convert-blocking-notification-chain-to.patch @@ -0,0 +1,150 @@ +From 152acebf7c34fb7d918b8823977e664fab5b84c8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Mar 2025 14:15:09 +0200 +Subject: net: switchdev: Convert blocking notification chain to a raw one + +From: Amit Cohen + +[ Upstream commit 62531a1effa87bdab12d5104015af72e60d926ff ] + +A blocking notification chain uses a read-write semaphore to protect the +integrity of the chain. The semaphore is acquired for writing when +adding / removing notifiers to / from the chain and acquired for reading +when traversing the chain and informing notifiers about an event. + +In case of the blocking switchdev notification chain, recursive +notifications are possible which leads to the semaphore being acquired +twice for reading and to lockdep warnings being generated [1]. + +Specifically, this can happen when the bridge driver processes a +SWITCHDEV_BRPORT_UNOFFLOADED event which causes it to emit notifications +about deferred events when calling switchdev_deferred_process(). + +Fix this by converting the notification chain to a raw notification +chain in a similar fashion to the netdev notification chain. Protect +the chain using the RTNL mutex by acquiring it when modifying the chain. +Events are always informed under the RTNL mutex, but add an assertion in +call_switchdev_blocking_notifiers() to make sure this is not violated in +the future. + +Maintain the "blocking" prefix as events are always emitted from process +context and listeners are allowed to block. + +[1]: +WARNING: possible recursive locking detected +6.14.0-rc4-custom-g079270089484 #1 Not tainted +-------------------------------------------- +ip/52731 is trying to acquire lock: +ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 + +but task is already holding lock: +ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 + +other info that might help us debug this: +Possible unsafe locking scenario: +CPU0 +---- +lock((switchdev_blocking_notif_chain).rwsem); +lock((switchdev_blocking_notif_chain).rwsem); + +*** DEADLOCK *** +May be due to missing lock nesting notation +3 locks held by ip/52731: + #0: ffffffff84f795b0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x727/0x1dc0 + #1: ffffffff8731f628 (&net->rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x790/0x1dc0 + #2: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0 + +stack backtrace: +... +? __pfx_down_read+0x10/0x10 +? __pfx_mark_lock+0x10/0x10 +? __pfx_switchdev_port_attr_set_deferred+0x10/0x10 +blocking_notifier_call_chain+0x58/0xa0 +switchdev_port_attr_notify.constprop.0+0xb3/0x1b0 +? __pfx_switchdev_port_attr_notify.constprop.0+0x10/0x10 +? mark_held_locks+0x94/0xe0 +? switchdev_deferred_process+0x11a/0x340 +switchdev_port_attr_set_deferred+0x27/0xd0 +switchdev_deferred_process+0x164/0x340 +br_switchdev_port_unoffload+0xc8/0x100 [bridge] +br_switchdev_blocking_event+0x29f/0x580 [bridge] +notifier_call_chain+0xa2/0x440 +blocking_notifier_call_chain+0x6e/0xa0 +switchdev_bridge_port_unoffload+0xde/0x1a0 +... + +Fixes: f7a70d650b0b6 ("net: bridge: switchdev: Ensure deferred event delivery on unoffload") +Signed-off-by: Amit Cohen +Reviewed-by: Ido Schimmel +Reviewed-by: Simon Horman +Reviewed-by: Vladimir Oltean +Tested-by: Vladimir Oltean +Link: https://patch.msgid.link/20250305121509.631207-1-amcohen@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/switchdev/switchdev.c | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) + +diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c +index 6488ead9e4645..4d5fbacef496f 100644 +--- a/net/switchdev/switchdev.c ++++ b/net/switchdev/switchdev.c +@@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); +-static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); ++static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); + + /** + * register_switchdev_notifier - Register notifier +@@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); + + int register_switchdev_blocking_notifier(struct notifier_block *nb) + { +- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; ++ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; ++ int err; ++ ++ rtnl_lock(); ++ err = raw_notifier_chain_register(chain, nb); ++ rtnl_unlock(); + +- return blocking_notifier_chain_register(chain, nb); ++ return err; + } + EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); + + int unregister_switchdev_blocking_notifier(struct notifier_block *nb) + { +- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; ++ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; ++ int err; + +- return blocking_notifier_chain_unregister(chain, nb); ++ rtnl_lock(); ++ err = raw_notifier_chain_unregister(chain, nb); ++ rtnl_unlock(); ++ ++ return err; + } + EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); + +@@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) + { ++ ASSERT_RTNL(); + info->dev = dev; + info->extack = extack; +- return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, +- val, info); ++ return raw_notifier_call_chain(&switchdev_blocking_notif_chain, ++ val, info); + } + EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); + +-- +2.39.5 + diff --git a/queue-6.13/net_sched-prevent-creation-of-classes-with-tc_h_root.patch b/queue-6.13/net_sched-prevent-creation-of-classes-with-tc_h_root.patch new file mode 100644 index 0000000000..1905eb45b9 --- /dev/null +++ b/queue-6.13/net_sched-prevent-creation-of-classes-with-tc_h_root.patch @@ -0,0 +1,50 @@ +From ce9d52da8329393169c8cae690525be506c623e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 15:23:54 -0800 +Subject: net_sched: Prevent creation of classes with TC_H_ROOT + +From: Cong Wang + +[ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ] + +The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination +condition when traversing up the qdisc tree to update parent backlog +counters. However, if a class is created with classid TC_H_ROOT, the +traversal terminates prematurely at this class instead of reaching the +actual root qdisc, causing parent statistics to be incorrectly maintained. +In case of DRR, this could lead to a crash as reported by Mingi Cho. + +Prevent the creation of any Qdisc class with classid TC_H_ROOT +(0xFFFFFFFF) across all qdisc types, as suggested by Jamal. + +Reported-by: Mingi Cho +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop") +Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_api.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index fac9c946a4c75..778cf54df69b0 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -2254,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + return -EOPNOTSUPP; + } + ++ /* Prevent creation of traffic classes with classid TC_H_ROOT */ ++ if (clid == TC_H_ROOT) { ++ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); ++ return -EINVAL; ++ } ++ + new_cl = cl; + err = -EOPNOTSUPP; + if (cops->change) +-- +2.39.5 + diff --git a/queue-6.13/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch b/queue-6.13/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch new file mode 100644 index 0000000000..111d201654 --- /dev/null +++ b/queue-6.13/netfilter-nf_conncount-fully-initialize-struct-nf_co.patch @@ -0,0 +1,129 @@ +From 574db12219f4a0c2bf9ab5e6579c7cee5f78855a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Mar 2025 17:07:38 +0900 +Subject: netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple + in insert_tree() + +From: Kohei Enju + +[ Upstream commit d653bfeb07ebb3499c403404c21ac58a16531607 ] + +Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage +collection confirm race"), `cpu` and `jiffies32` were introduced to +the struct nf_conncount_tuple. + +The commit made nf_conncount_add() initialize `conn->cpu` and +`conn->jiffies32` when allocating the struct. +In contrast, count_tree() was not changed to initialize them. + +By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and +traversal"), count_tree() was split and the relevant allocation +code now resides in insert_tree(). +Initialize `conn->cpu` and `conn->jiffies32` in insert_tree(). + +BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline] +BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + find_or_evict net/netfilter/nf_conncount.c:117 [inline] + __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143 + count_tree net/netfilter/nf_conncount.c:438 [inline] + nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Uninit was created at: + slab_post_alloc_hook mm/slub.c:4121 [inline] + slab_alloc_node mm/slub.c:4164 [inline] + kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171 + insert_tree net/netfilter/nf_conncount.c:372 [inline] + count_tree net/netfilter/nf_conncount.c:450 [inline] + nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521 + connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72 + __nft_match_eval net/netfilter/nft_compat.c:403 [inline] + nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433 + expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] + nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288 + nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626 + nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663 + NF_HOOK_LIST include/linux/netfilter.h:350 [inline] + ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633 + ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669 + __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline] + __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983 + __netif_receive_skb_list net/core/dev.c:6035 [inline] + netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126 + netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178 + xdp_recv_frames net/bpf/test_run.c:280 [inline] + xdp_test_run_batch net/bpf/test_run.c:361 [inline] + bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390 + bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316 + bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407 + __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813 + __do_sys_bpf kernel/bpf/syscall.c:5902 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5900 [inline] + __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900 + ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358 + do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] + __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387 + do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450 + entry_SYSENTER_compat_after_hwframe+0x84/0x8e + +Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7 +Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race") +Signed-off-by: Kohei Enju +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conncount.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c +index ebe38ed2e6f4f..913ede2f57f9a 100644 +--- a/net/netfilter/nf_conncount.c ++++ b/net/netfilter/nf_conncount.c +@@ -377,6 +377,8 @@ insert_tree(struct net *net, + + conn->tuple = *tuple; + conn->zone = *zone; ++ conn->cpu = raw_smp_processor_id(); ++ conn->jiffies32 = (u32)jiffies; + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); + + nf_conncount_list_init(&rbconn->list); +-- +2.39.5 + diff --git a/queue-6.13/netfilter-nf_conncount-garbage-collection-is-not-ski.patch b/queue-6.13/netfilter-nf_conncount-garbage-collection-is-not-ski.patch new file mode 100644 index 0000000000..f20804de0b --- /dev/null +++ b/queue-6.13/netfilter-nf_conncount-garbage-collection-is-not-ski.patch @@ -0,0 +1,55 @@ +From ddd2f2a7451aed0963e81825bc4b63b0c322ba67 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Feb 2025 13:32:34 +0000 +Subject: netfilter: nf_conncount: garbage collection is not skipped when + jiffies wrap around + +From: Nicklas Bo Jensen + +[ Upstream commit df08c94baafb001de6cf44bb7098bb557f36c335 ] + +nf_conncount is supposed to skip garbage collection if it has already +run garbage collection in the same jiffy. Unfortunately, this is broken +when jiffies wrap around which this patch fixes. + +The problem is that last_gc in the nf_conncount_list struct is an u32, +but jiffies is an unsigned long which is 8 bytes on my systems. When +those two are compared it only works until last_gc wraps around. + +See bug report: https://bugzilla.netfilter.org/show_bug.cgi?id=1778 +for more details. + +Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC") +Signed-off-by: Nicklas Bo Jensen +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conncount.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c +index 4890af4dc263f..ebe38ed2e6f4f 100644 +--- a/net/netfilter/nf_conncount.c ++++ b/net/netfilter/nf_conncount.c +@@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, + struct nf_conn *found_ct; + unsigned int collect = 0; + +- if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) ++ if ((u32)jiffies == list->last_gc) + goto add_new_node; + + /* check the saved connections */ +@@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, + bool ret = false; + + /* don't bother if we just did GC */ +- if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) ++ if ((u32)jiffies == READ_ONCE(list->last_gc)) + return false; + + /* don't bother if other cpu is already doing GC */ +-- +2.39.5 + diff --git a/queue-6.13/netfilter-nf_tables-make-destruction-work-queue-pern.patch b/queue-6.13/netfilter-nf_tables-make-destruction-work-queue-pern.patch new file mode 100644 index 0000000000..46871fe952 --- /dev/null +++ b/queue-6.13/netfilter-nf_tables-make-destruction-work-queue-pern.patch @@ -0,0 +1,235 @@ +From 758cc3f0e8f846fe1af033ebca36cbd4f5c96a6d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 04:05:26 +0100 +Subject: netfilter: nf_tables: make destruction work queue pernet + +From: Florian Westphal + +[ Upstream commit fb8286562ecfb585e26b033c5e32e6fb85efb0b3 ] + +The call to flush_work before tearing down a table from the netlink +notifier was supposed to make sure that all earlier updates (e.g. rule +add) that might reference that table have been processed. + +Unfortunately, flush_work() waits for the last queued instance. +This could be an instance that is different from the one that we must +wait for. + +This is because transactions are protected with a pernet mutex, but the +work item is global, so holding the transaction mutex doesn't prevent +another netns from queueing more work. + +Make the work item pernet so that flush_work() will wait for all +transactions queued from this netns. + +A welcome side effect is that we no longer need to wait for transaction +objects from foreign netns. + +The gc work queue is still global. This seems to be ok because nft_set +structures are reference counted and each container structure owns a +reference on the net namespace. + +The destroy_list is still protected by a global spinlock rather than +pernet one but the hold time is very short anyway. + +v2: call cancel_work_sync before reaping the remaining tables (Pablo). + +Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier") +Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 4 +++- + net/netfilter/nf_tables_api.c | 24 ++++++++++++++---------- + net/netfilter/nft_compat.c | 8 ++++---- + 3 files changed, 21 insertions(+), 15 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index f6958118986ac..ea0236c938d8f 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void); + void __init nft_chain_route_init(void); + void nft_chain_route_fini(void); + +-void nf_tables_trans_destroy_flush_work(void); ++void nf_tables_trans_destroy_flush_work(struct net *net); + + int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); + __be64 nf_jiffies64_to_msecs(u64 input); +@@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re + struct nftables_pernet { + struct list_head tables; + struct list_head commit_list; ++ struct list_head destroy_list; + struct list_head commit_set_list; + struct list_head binding_list; + struct list_head module_list; +@@ -1915,6 +1916,7 @@ struct nftables_pernet { + unsigned int base_seq; + unsigned int gc_seq; + u8 validate_state; ++ struct work_struct destroy_work; + }; + + extern unsigned int nf_tables_net_id; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 73e37861ff11f..99f6b2530b96e 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly; + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); +-static LIST_HEAD(nf_tables_destroy_list); + static LIST_HEAD(nf_tables_gc_list); + static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); + static DEFINE_SPINLOCK(nf_tables_gc_list_lock); +@@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s + table->validate_state = new_validate_state; + } + static void nf_tables_trans_destroy_work(struct work_struct *w); +-static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); + + static void nft_trans_gc_work(struct work_struct *work); + static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); +@@ -10004,11 +10002,12 @@ static void nft_commit_release(struct nft_trans *trans) + + static void nf_tables_trans_destroy_work(struct work_struct *w) + { ++ struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); + struct nft_trans *trans, *next; + LIST_HEAD(head); + + spin_lock(&nf_tables_destroy_list_lock); +- list_splice_init(&nf_tables_destroy_list, &head); ++ list_splice_init(&nft_net->destroy_list, &head); + spin_unlock(&nf_tables_destroy_list_lock); + + if (list_empty(&head)) +@@ -10022,9 +10021,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) + } + } + +-void nf_tables_trans_destroy_flush_work(void) ++void nf_tables_trans_destroy_flush_work(struct net *net) + { +- flush_work(&trans_destroy_work); ++ struct nftables_pernet *nft_net = nft_pernet(net); ++ ++ flush_work(&nft_net->destroy_work); + } + EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); + +@@ -10482,11 +10483,11 @@ static void nf_tables_commit_release(struct net *net) + + trans->put_net = true; + spin_lock(&nf_tables_destroy_list_lock); +- list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); ++ list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); + spin_unlock(&nf_tables_destroy_list_lock); + + nf_tables_module_autoload_cleanup(net); +- schedule_work(&trans_destroy_work); ++ schedule_work(&nft_net->destroy_work); + + mutex_unlock(&nft_net->commit_mutex); + } +@@ -11892,7 +11893,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, + + gc_seq = nft_gc_seq_begin(nft_net); + +- nf_tables_trans_destroy_flush_work(); ++ nf_tables_trans_destroy_flush_work(net); + again: + list_for_each_entry(table, &nft_net->tables, list) { + if (nft_table_has_owner(table) && +@@ -11934,6 +11935,7 @@ static int __net_init nf_tables_init_net(struct net *net) + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); ++ INIT_LIST_HEAD(&nft_net->destroy_list); + INIT_LIST_HEAD(&nft_net->commit_set_list); + INIT_LIST_HEAD(&nft_net->binding_list); + INIT_LIST_HEAD(&nft_net->module_list); +@@ -11942,6 +11944,7 @@ static int __net_init nf_tables_init_net(struct net *net) + nft_net->base_seq = 1; + nft_net->gc_seq = 0; + nft_net->validate_state = NFT_VALIDATE_SKIP; ++ INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); + + return 0; + } +@@ -11970,14 +11973,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) + if (!list_empty(&nft_net->module_list)) + nf_tables_module_autoload_cleanup(net); + ++ cancel_work_sync(&nft_net->destroy_work); + __nft_release_tables(net); + + nft_gc_seq_end(nft_net, gc_seq); + + mutex_unlock(&nft_net->commit_mutex); ++ + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + WARN_ON_ONCE(!list_empty(&nft_net->module_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); ++ WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); + } + + static void nf_tables_exit_batch(struct list_head *net_exit_list) +@@ -12068,10 +12074,8 @@ static void __exit nf_tables_module_exit(void) + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); + nft_chain_filter_fini(); + nft_chain_route_fini(); +- nf_tables_trans_destroy_flush_work(); + unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); +- cancel_work_sync(&trans_destroy_work); + rcu_barrier(); + rhltable_destroy(&nft_objname_ht); + nf_tables_core_module_exit(); +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index 7ca4f0d21fe2a..72711d62fddfa 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) + return 0; + } + +-static void nft_compat_wait_for_destructors(void) ++static void nft_compat_wait_for_destructors(struct net *net) + { + /* xtables matches or targets can have side effects, e.g. + * creation/destruction of /proc files. +@@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void) + * work queue. If we have pending invocations we thus + * need to wait for those to finish. + */ +- nf_tables_trans_destroy_flush_work(); ++ nf_tables_trans_destroy_flush_work(net); + } + + static int +@@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + +- nft_compat_wait_for_destructors(); ++ nft_compat_wait_for_destructors(ctx->net); + + ret = xt_check_target(&par, size, proto, inv); + if (ret < 0) { +@@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + +- nft_compat_wait_for_destructors(); ++ nft_compat_wait_for_destructors(ctx->net); + + return xt_check_match(&par, size, proto, inv); + } +-- +2.39.5 + diff --git a/queue-6.13/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch b/queue-6.13/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch new file mode 100644 index 0000000000..5d3f2ec3f4 --- /dev/null +++ b/queue-6.13/netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch @@ -0,0 +1,63 @@ +From 4d273229d05b47c44f03e9288058698ebeae94f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Feb 2025 17:02:42 +0100 +Subject: netfilter: nft_ct: Use __refcount_inc() for per-CPU + nft_ct_pcpu_template. + +From: Sebastian Andrzej Siewior + +[ Upstream commit 5cfe5612ca9590db69b9be29dc83041dbf001108 ] + +nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its +locking. The refcounter is read and if its value is set to one then the +refcounter is incremented and variable is used - otherwise it is already +in use and left untouched. + +Without per-CPU locking in local_bh_disable() on PREEMPT_RT the +read-then-increment operation is not atomic and therefore racy. + +This can be avoided by using unconditionally __refcount_inc() which will +increment counter and return the old value as an atomic operation. +In case the returned counter is not one, the variable is in use and we +need to decrement counter. Otherwise we can use it. + +Use __refcount_inc() instead of read and a conditional increment. + +Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_ct.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index 67a41cd2baaff..a1b373b99f7b8 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + enum ip_conntrack_info ctinfo; + u16 value = nft_reg_load16(®s->data[priv->sreg]); + struct nf_conn *ct; ++ int oldcnt; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) /* already tracked */ +@@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, + + ct = this_cpu_read(nft_ct_pcpu_template); + +- if (likely(refcount_read(&ct->ct_general.use) == 1)) { +- refcount_inc(&ct->ct_general.use); ++ __refcount_inc(&ct->ct_general.use, &oldcnt); ++ if (likely(oldcnt == 1)) { + nf_ct_zone_add(ct, &zone); + } else { ++ refcount_dec(&ct->ct_general.use); + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ +-- +2.39.5 + diff --git a/queue-6.13/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch b/queue-6.13/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch new file mode 100644 index 0000000000..602f058b4e --- /dev/null +++ b/queue-6.13/netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch @@ -0,0 +1,78 @@ +From 23b8f4d15643e9aef8f9f67db901342e93dffff4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Mar 2025 00:14:36 +0300 +Subject: netfilter: nft_exthdr: fix offset with ipv4_find_option() + +From: Alexey Kashavkin + +[ Upstream commit 6edd78af9506bb182518da7f6feebd75655d9a0e ] + +There is an incorrect calculation in the offset variable which causes +the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the +start variable is redundant. In the __ip_options_compile() function the +correct offset is specified when finding the function. There is no need +to add the size of the iphdr structure to the offset. + +Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") +Signed-off-by: Alexey Kashavkin +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_exthdr.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c +index b8d03364566c1..c74012c991255 100644 +--- a/net/netfilter/nft_exthdr.c ++++ b/net/netfilter/nft_exthdr.c +@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + struct iphdr *iph, _iph; +- unsigned int start; + bool found = false; + __be32 info; + int optlen; +@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (!iph) + return -EBADMSG; +- start = sizeof(struct iphdr); + + optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); + if (optlen <= 0) +@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + /* Copy the options since __ip_options_compile() modifies + * the options. + */ +- if (skb_copy_bits(skb, start, opt->__data, optlen)) ++ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) + return -EBADMSG; + opt->optlen = optlen; + +@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, + found = target == IPOPT_SSRR ? opt->is_strictroute : + !opt->is_strictroute; + if (found) +- *offset = opt->srr + start; ++ *offset = opt->srr; + break; + case IPOPT_RR: + if (!opt->rr) + break; +- *offset = opt->rr + start; ++ *offset = opt->rr; + found = true; + break; + case IPOPT_RA: + if (!opt->router_alert) + break; +- *offset = opt->router_alert + start; ++ *offset = opt->router_alert; + found = true; + break; + default: +-- +2.39.5 + diff --git a/queue-6.13/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch b/queue-6.13/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch new file mode 100644 index 0000000000..a0ebdd717b --- /dev/null +++ b/queue-6.13/netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch @@ -0,0 +1,76 @@ +From 1adac3c566118eff1327e8fb686890ae141815c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 05:16:18 -0800 +Subject: netpoll: hold rcu read lock in __netpoll_send_skb() + +From: Breno Leitao + +[ Upstream commit 505ead7ab77f289f12d8a68ac83da068e4d4408b ] + +The function __netpoll_send_skb() is being invoked without holding the +RCU read lock. This oversight triggers a warning message when +CONFIG_PROVE_RCU_LIST is enabled: + + net/core/netpoll.c:330 suspicious rcu_dereference_check() usage! + + netpoll_send_skb + netpoll_send_udp + write_ext_msg + console_flush_all + console_unlock + vprintk_emit + +To prevent npinfo from disappearing unexpectedly, ensure that +__netpoll_send_skb() is protected with the RCU read lock. + +Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()") +Signed-off-by: Breno Leitao +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/netpoll.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/net/core/netpoll.c b/net/core/netpoll.c +index 96a6ed37d4ccb..7ee754cd2e2f0 100644 +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev) + static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + { + netdev_tx_t status = NETDEV_TX_BUSY; ++ netdev_tx_t ret = NET_XMIT_DROP; + struct net_device *dev; + unsigned long tries; + /* It is up to the caller to keep npinfo alive. */ +@@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + lockdep_assert_irqs_disabled(); + + dev = np->dev; ++ rcu_read_lock(); + npinfo = rcu_dereference_bh(dev->npinfo); + + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + dev_kfree_skb_irq(skb); +- return NET_XMIT_DROP; ++ goto out; + } + + /* don't get messages out of order, and no recursion */ +@@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } +- return NETDEV_TX_OK; ++ ret = NETDEV_TX_OK; ++out: ++ rcu_read_unlock(); ++ return ret; + } + + netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +-- +2.39.5 + diff --git a/queue-6.13/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch b/queue-6.13/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch new file mode 100644 index 0000000000..c431846b52 --- /dev/null +++ b/queue-6.13/pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch @@ -0,0 +1,40 @@ +From be7bc307bac079bc5b62a5213c4272e35ca24697 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Feb 2025 21:02:41 +0100 +Subject: pinctrl: bcm281xx: Fix incorrect regmap max_registers value + +From: Artur Weber + +[ Upstream commit 68283c1cb573143c0b7515e93206f3503616bc10 ] + +The max_registers value does not take into consideration the stride; +currently, it's set to the number of the last pin, but this does not +accurately represent the final register. + +Fix this by multiplying the current value by 4. + +Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver") +Signed-off-by: Artur Weber +Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +index 73dbf29c002f3..cf6efa9c0364a 100644 +--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c ++++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c +@@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +- .max_register = BCM281XX_PIN_VC_CAM3_SDA, ++ .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4, + }; + + static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) +-- +2.39.5 + diff --git a/queue-6.13/pinctrl-nuvoton-npcm8xx-add-null-check-in-npcm8xx_gp.patch b/queue-6.13/pinctrl-nuvoton-npcm8xx-add-null-check-in-npcm8xx_gp.patch new file mode 100644 index 0000000000..f706730fb0 --- /dev/null +++ b/queue-6.13/pinctrl-nuvoton-npcm8xx-add-null-check-in-npcm8xx_gp.patch @@ -0,0 +1,40 @@ +From 9dac10bceb4b8b626bf22893e3556f794760f967 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Feb 2025 18:05:32 +0800 +Subject: pinctrl: nuvoton: npcm8xx: Add NULL check in npcm8xx_gpio_fw + +From: Charles Han + +[ Upstream commit acf40ab42799e4ae1397ee6f5c5941092d66f999 ] + +devm_kasprintf() calls can return null pointers on failure. +But the return values were not checked in npcm8xx_gpio_fw(). +Add NULL check in npcm8xx_gpio_fw(), to handle kernel NULL +pointer dereference error. + +Fixes: acf4884a5717 ("pinctrl: nuvoton: add NPCM8XX pinctrl and GPIO driver") +Signed-off-by: Charles Han +Link: https://lore.kernel.org/20250212100532.4317-1-hanchunchao@inspur.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +index 471f644c5eef2..d09a5e9b2eca5 100644 +--- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c ++++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c +@@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl) + pctrl->gpio_bank[id].gc.parent = dev; + pctrl->gpio_bank[id].gc.fwnode = child; + pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child); ++ if (pctrl->gpio_bank[id].gc.label == NULL) ++ return -ENOMEM; ++ + pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show; + pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input; + pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input; +-- +2.39.5 + diff --git a/queue-6.13/revert-bluetooth-hci_core-fix-sleeping-function-call.patch b/queue-6.13/revert-bluetooth-hci_core-fix-sleeping-function-call.patch new file mode 100644 index 0000000000..13428d3e77 --- /dev/null +++ b/queue-6.13/revert-bluetooth-hci_core-fix-sleeping-function-call.patch @@ -0,0 +1,389 @@ +From 64837dad2699cdb8b382d44e665ae99dd5715808 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Mar 2025 10:06:10 -0500 +Subject: Revert "Bluetooth: hci_core: Fix sleeping function called from + invalid context" + +From: Luiz Augusto von Dentz + +[ Upstream commit ab6ab707a4d060a51c45fc13e3b2228d5f7c0b87 ] + +This reverts commit 4d94f05558271654670d18c26c912da0c1c15549 which has +problems (see [1]) and is no longer needed since 581dd2dc168f +("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating") +has reworked the code where the original bug has been found. + +[1] Link: https://lore.kernel.org/linux-bluetooth/877c55ci1r.wl-tiwai@suse.de/T/#t +Fixes: 4d94f0555827 ("Bluetooth: hci_core: Fix sleeping function called from invalid context") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 108 +++++++++++-------------------- + net/bluetooth/hci_core.c | 10 ++- + net/bluetooth/iso.c | 6 -- + net/bluetooth/l2cap_core.c | 12 ++-- + net/bluetooth/rfcomm/core.c | 6 -- + net/bluetooth/sco.c | 12 ++-- + 6 files changed, 57 insertions(+), 97 deletions(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index ca22ead85dbe0..ea798f07c5a2d 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -804,6 +804,7 @@ struct hci_conn_params { + extern struct list_head hci_dev_list; + extern struct list_head hci_cb_list; + extern rwlock_t hci_dev_list_lock; ++extern struct mutex hci_cb_list_lock; + + #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags) + #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags) +@@ -2016,47 +2017,24 @@ struct hci_cb { + + char *name; + +- bool (*match) (struct hci_conn *conn); + void (*connect_cfm) (struct hci_conn *conn, __u8 status); + void (*disconn_cfm) (struct hci_conn *conn, __u8 status); + void (*security_cfm) (struct hci_conn *conn, __u8 status, +- __u8 encrypt); ++ __u8 encrypt); + void (*key_change_cfm) (struct hci_conn *conn, __u8 status); + void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role); + }; + +-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list) +-{ +- struct hci_cb *cb, *cpy; +- +- rcu_read_lock(); +- list_for_each_entry_rcu(cb, &hci_cb_list, list) { +- if (cb->match && cb->match(conn)) { +- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC); +- if (!cpy) +- break; +- +- *cpy = *cb; +- INIT_LIST_HEAD(&cpy->list); +- list_add_rcu(&cpy->list, list); +- } +- } +- rcu_read_unlock(); +-} +- + static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) + { +- struct list_head list; +- struct hci_cb *cb, *tmp; +- +- INIT_LIST_HEAD(&list); +- hci_cb_lookup(conn, &list); ++ struct hci_cb *cb; + +- list_for_each_entry_safe(cb, tmp, &list, list) { ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->connect_cfm) + cb->connect_cfm(conn, status); +- kfree(cb); + } ++ mutex_unlock(&hci_cb_list_lock); + + if (conn->connect_cfm_cb) + conn->connect_cfm_cb(conn, status); +@@ -2064,43 +2042,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status) + + static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason) + { +- struct list_head list; +- struct hci_cb *cb, *tmp; +- +- INIT_LIST_HEAD(&list); +- hci_cb_lookup(conn, &list); ++ struct hci_cb *cb; + +- list_for_each_entry_safe(cb, tmp, &list, list) { ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->disconn_cfm) + cb->disconn_cfm(conn, reason); +- kfree(cb); + } ++ mutex_unlock(&hci_cb_list_lock); + + if (conn->disconn_cfm_cb) + conn->disconn_cfm_cb(conn, reason); + } + +-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status, +- __u8 encrypt) +-{ +- struct list_head list; +- struct hci_cb *cb, *tmp; +- +- INIT_LIST_HEAD(&list); +- hci_cb_lookup(conn, &list); +- +- list_for_each_entry_safe(cb, tmp, &list, list) { +- if (cb->security_cfm) +- cb->security_cfm(conn, status, encrypt); +- kfree(cb); +- } +- +- if (conn->security_cfm_cb) +- conn->security_cfm_cb(conn, status); +-} +- + static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) + { ++ struct hci_cb *cb; + __u8 encrypt; + + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) +@@ -2108,11 +2065,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) + + encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00; + +- hci_security_cfm(conn, status, encrypt); ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { ++ if (cb->security_cfm) ++ cb->security_cfm(conn, status, encrypt); ++ } ++ mutex_unlock(&hci_cb_list_lock); ++ ++ if (conn->security_cfm_cb) ++ conn->security_cfm_cb(conn, status); + } + + static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) + { ++ struct hci_cb *cb; + __u8 encrypt; + + if (conn->state == BT_CONFIG) { +@@ -2139,38 +2105,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) + conn->sec_level = conn->pending_sec_level; + } + +- hci_security_cfm(conn, status, encrypt); ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { ++ if (cb->security_cfm) ++ cb->security_cfm(conn, status, encrypt); ++ } ++ mutex_unlock(&hci_cb_list_lock); ++ ++ if (conn->security_cfm_cb) ++ conn->security_cfm_cb(conn, status); + } + + static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status) + { +- struct list_head list; +- struct hci_cb *cb, *tmp; +- +- INIT_LIST_HEAD(&list); +- hci_cb_lookup(conn, &list); ++ struct hci_cb *cb; + +- list_for_each_entry_safe(cb, tmp, &list, list) { ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->key_change_cfm) + cb->key_change_cfm(conn, status); +- kfree(cb); + } ++ mutex_unlock(&hci_cb_list_lock); + } + + static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, + __u8 role) + { +- struct list_head list; +- struct hci_cb *cb, *tmp; +- +- INIT_LIST_HEAD(&list); +- hci_cb_lookup(conn, &list); ++ struct hci_cb *cb; + +- list_for_each_entry_safe(cb, tmp, &list, list) { ++ mutex_lock(&hci_cb_list_lock); ++ list_for_each_entry(cb, &hci_cb_list, list) { + if (cb->role_switch_cfm) + cb->role_switch_cfm(conn, status, role); +- kfree(cb); + } ++ mutex_unlock(&hci_cb_list_lock); + } + + static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 18ab5628f85ad..f9e19f9cb5a38 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); + + /* HCI callback list */ + LIST_HEAD(hci_cb_list); ++DEFINE_MUTEX(hci_cb_list_lock); + + /* HCI ID Numbering */ + static DEFINE_IDA(hci_index_ida); +@@ -2992,7 +2993,9 @@ int hci_register_cb(struct hci_cb *cb) + { + BT_DBG("%p name %s", cb, cb->name); + +- list_add_tail_rcu(&cb->list, &hci_cb_list); ++ mutex_lock(&hci_cb_list_lock); ++ list_add_tail(&cb->list, &hci_cb_list); ++ mutex_unlock(&hci_cb_list_lock); + + return 0; + } +@@ -3002,8 +3005,9 @@ int hci_unregister_cb(struct hci_cb *cb) + { + BT_DBG("%p name %s", cb, cb->name); + +- list_del_rcu(&cb->list); +- synchronize_rcu(); ++ mutex_lock(&hci_cb_list_lock); ++ list_del(&cb->list); ++ mutex_unlock(&hci_cb_list_lock); + + return 0; + } +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index 43d0ebe111000..73724439fe718 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -2151,11 +2151,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) + return HCI_LM_ACCEPT; + } + +-static bool iso_match(struct hci_conn *hcon) +-{ +- return hcon->type == ISO_LINK || hcon->type == LE_LINK; +-} +- + static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) + { + if (hcon->type != ISO_LINK) { +@@ -2337,7 +2332,6 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) + + static struct hci_cb iso_cb = { + .name = "ISO", +- .match = iso_match, + .connect_cfm = iso_connect_cfm, + .disconn_cfm = iso_disconn_cfm, + }; +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 728a5ce9b5058..4db2d6363bbb5 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -7222,11 +7222,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, + return NULL; + } + +-static bool l2cap_match(struct hci_conn *hcon) +-{ +- return hcon->type == ACL_LINK || hcon->type == LE_LINK; +-} +- + static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) + { + struct hci_dev *hdev = hcon->hdev; +@@ -7234,6 +7229,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) + struct l2cap_chan *pchan; + u8 dst_type; + ++ if (hcon->type != ACL_LINK && hcon->type != LE_LINK) ++ return; ++ + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); + + if (status) { +@@ -7298,6 +7296,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) + + static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) + { ++ if (hcon->type != ACL_LINK && hcon->type != LE_LINK) ++ return; ++ + BT_DBG("hcon %p reason %d", hcon, reason); + + l2cap_conn_del(hcon, bt_to_errno(reason)); +@@ -7576,7 +7577,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) + + static struct hci_cb l2cap_cb = { + .name = "L2CAP", +- .match = l2cap_match, + .connect_cfm = l2cap_connect_cfm, + .disconn_cfm = l2cap_disconn_cfm, + .security_cfm = l2cap_security_cfm, +diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c +index 4c56ca5a216c6..ad5177e3a69b7 100644 +--- a/net/bluetooth/rfcomm/core.c ++++ b/net/bluetooth/rfcomm/core.c +@@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused) + return 0; + } + +-static bool rfcomm_match(struct hci_conn *hcon) +-{ +- return hcon->type == ACL_LINK; +-} +- + static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) + { + struct rfcomm_session *s; +@@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) + + static struct hci_cb rfcomm_cb = { + .name = "RFCOMM", +- .match = rfcomm_match, + .security_cfm = rfcomm_security_cfm + }; + +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index ed6846864ea93..5d1bc0d6aee03 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -1407,13 +1407,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) + return lm; + } + +-static bool sco_match(struct hci_conn *hcon) +-{ +- return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; +-} +- + static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) + { ++ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) ++ return; ++ + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); + + if (!status) { +@@ -1430,6 +1428,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) + + static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) + { ++ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) ++ return; ++ + BT_DBG("hcon %p reason %d", hcon, reason); + + sco_conn_del(hcon, bt_to_errno(reason)); +@@ -1455,7 +1456,6 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) + + static struct hci_cb sco_cb = { + .name = "SCO", +- .match = sco_match, + .connect_cfm = sco_connect_cfm, + .disconn_cfm = sco_disconn_cfm, + }; +-- +2.39.5 + diff --git a/queue-6.13/revert-openvswitch-switch-to-per-action-label-counti.patch b/queue-6.13/revert-openvswitch-switch-to-per-action-label-counti.patch new file mode 100644 index 0000000000..41bde2053f --- /dev/null +++ b/queue-6.13/revert-openvswitch-switch-to-per-action-label-counti.patch @@ -0,0 +1,139 @@ +From d86f934c4e4a42f344f0c01e6480fd2fccab2d5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 8 Mar 2025 13:05:43 -0500 +Subject: Revert "openvswitch: switch to per-action label counting in + conntrack" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Xin Long + +[ Upstream commit 1063ae07383c0ddc5bcce170260c143825846b03 ] + +Currently, ovs_ct_set_labels() is only called for confirmed conntrack +entries (ct) within ovs_ct_commit(). However, if the conntrack entry +does not have the labels_ext extension, attempting to allocate it in +ovs_ct_get_conn_labels() for a confirmed entry triggers a warning in +nf_ct_ext_add(): + + WARN_ON(nf_ct_is_confirmed(ct)); + +This happens when the conntrack entry is created externally before OVS +increments net->ct.labels_used. The issue has become more likely since +commit fcb1aa5163b1 ("openvswitch: switch to per-action label counting +in conntrack"), which changed to use per-action label counting and +increment net->ct.labels_used when a flow with ct action is added. + +Since there’s no straightforward way to fully resolve this issue at the +moment, this reverts the commit to avoid breaking existing use cases. + +Fixes: fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack") +Reported-by: Jianbo Liu +Signed-off-by: Xin Long +Acked-by: Aaron Conole +Link: https://patch.msgid.link/1bdeb2f3a812bca016a225d3de714427b2cd4772.1741457143.git.lucien.xin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/openvswitch/conntrack.c | 30 ++++++++++++++++++------------ + net/openvswitch/datapath.h | 3 +++ + 2 files changed, 21 insertions(+), 12 deletions(-) + +diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c +index 3bb4810234aac..e573e92213029 100644 +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) + attr == OVS_KEY_ATTR_CT_MARK) + return true; + if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && +- attr == OVS_KEY_ATTR_CT_LABELS) +- return true; ++ attr == OVS_KEY_ATTR_CT_LABELS) { ++ struct ovs_net *ovs_net = net_generic(net, ovs_net_id); ++ ++ return ovs_net->xt_label; ++ } + + return false; + } +@@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, bool log) + { +- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; + struct ovs_conntrack_info ct_info; + const char *helper = NULL; + u16 family; +@@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, + return -ENOMEM; + } + +- if (nf_connlabels_get(net, n_bits - 1)) { +- nf_ct_tmpl_free(ct_info.ct); +- OVS_NLERR(log, "Failed to set connlabel length"); +- return -EOPNOTSUPP; +- } +- + if (ct_info.timeout[0]) { + if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, + ct_info.timeout)) +@@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) + if (ct_info->ct) { + if (ct_info->timeout[0]) + nf_ct_destroy_timeout(ct_info->ct); +- nf_connlabels_put(nf_ct_net(ct_info->ct)); + nf_ct_tmpl_free(ct_info->ct); + } + } +@@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { + + int ovs_ct_init(struct net *net) + { +-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) ++ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + ++ if (nf_connlabels_get(net, n_bits - 1)) { ++ ovs_net->xt_label = false; ++ OVS_NLERR(true, "Failed to set connlabel length"); ++ } else { ++ ovs_net->xt_label = true; ++ } ++ ++#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + return ovs_ct_limit_init(net, ovs_net); + #else + return 0; +@@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net) + + void ovs_ct_exit(struct net *net) + { +-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + ++#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + ovs_ct_limit_exit(net, ovs_net); + #endif ++ ++ if (ovs_net->xt_label) ++ nf_connlabels_put(net); + } +diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h +index 365b9bb7f546e..9ca6231ea6470 100644 +--- a/net/openvswitch/datapath.h ++++ b/net/openvswitch/datapath.h +@@ -160,6 +160,9 @@ struct ovs_net { + #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + struct ovs_ct_limit_info *ct_limit_info; + #endif ++ ++ /* Module reference for configuring conntrack. */ ++ bool xt_label; + }; + + /** +-- +2.39.5 + diff --git a/queue-6.13/rtase-fix-improper-release-of-ring-list-entries-in-r.patch b/queue-6.13/rtase-fix-improper-release-of-ring-list-entries-in-r.patch new file mode 100644 index 0000000000..969e73323a --- /dev/null +++ b/queue-6.13/rtase-fix-improper-release-of-ring-list-entries-in-r.patch @@ -0,0 +1,58 @@ +From 3bceff974745eb34f4da7a430ba9e4f72584470a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 15:05:10 +0800 +Subject: rtase: Fix improper release of ring list entries in rtase_sw_reset + +From: Justin Lai + +[ Upstream commit 415f135ace7fd824cde083184a922e39156055b5 ] + +Since rtase_init_ring, which is called within rtase_sw_reset, adds ring +entries already present in the ring list back into the list, it causes +the ring list to form a cycle. This results in list_for_each_entry_safe +failing to find an endpoint during traversal, leading to an error. +Therefore, it is necessary to remove the previously added ring_list nodes +before calling rtase_init_ring. + +Fixes: 079600489960 ("rtase: Implement net_device_ops") +Signed-off-by: Justin Lai +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306070510.18129-1-justinlai0215@realtek.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/rtase/rtase_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c +index c42c0516656b8..bb8f1bc215cdd 100644 +--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c ++++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c +@@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev) + static void rtase_sw_reset(struct net_device *dev) + { + struct rtase_private *tp = netdev_priv(dev); ++ struct rtase_ring *ring, *tmp; ++ struct rtase_int_vector *ivec; + int ret; ++ u32 i; + + netif_stop_queue(dev); + netif_carrier_off(dev); +@@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev) + rtase_tx_clear(tp); + rtase_rx_clear(tp); + ++ for (i = 0; i < tp->int_nums; i++) { ++ ivec = &tp->int_vector[i]; ++ list_for_each_entry_safe(ring, tmp, &ivec->ring_list, ++ ring_entry) ++ list_del(&ring->ring_entry); ++ } ++ + ret = rtase_init_ring(dev); + if (ret) { + netdev_err(dev, "unable to init ring\n"); +-- +2.39.5 + diff --git a/queue-6.13/sched-address-a-potential-null-pointer-dereference-i.patch b/queue-6.13/sched-address-a-potential-null-pointer-dereference-i.patch new file mode 100644 index 0000000000..f5d285957a --- /dev/null +++ b/queue-6.13/sched-address-a-potential-null-pointer-dereference-i.patch @@ -0,0 +1,46 @@ +From f7f0c50461992e62239c1527178424d274782927 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Mar 2025 23:44:10 +0800 +Subject: sched: address a potential NULL pointer dereference in the GRED + scheduler. + +From: Jun Yang + +[ Upstream commit 115ef44a98220fddfab37a39a19370497cd718b9 ] + +If kzalloc in gred_init returns a NULL pointer, the code follows the +error handling path, invoking gred_destroy. This, in turn, calls +gred_offload, where memset could receive a NULL pointer as input, +potentially leading to a kernel crash. + +When table->opt is NULL in gred_init(), gred_change_table_def() +is not called yet, so it is not necessary to call ->ndo_setup_tc() +in gred_offload(). + +Signed-off-by: Jun Yang +Reviewed-by: Cong Wang +Fixes: f25c0515c521 ("net: sched: gred: dynamically allocate tc_gred_qopt_offload") +Link: https://patch.msgid.link/20250305154410.3505642-1-juny24602@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_gred.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c +index 7d2151c62c4a1..85a32b3f6585e 100644 +--- a/net/sched/sch_gred.c ++++ b/net/sched/sch_gred.c +@@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) + for (i = 0; i < table->DPs; i++) + gred_destroy_vq(table->tab[i]); + +- gred_offload(sch, TC_GRED_DESTROY); ++ if (table->opt) ++ gred_offload(sch, TC_GRED_DESTROY); + kfree(table->opt); + } + +-- +2.39.5 + diff --git a/queue-6.13/selftests-bonding-fix-incorrect-mac-address.patch b/queue-6.13/selftests-bonding-fix-incorrect-mac-address.patch new file mode 100644 index 0000000000..d35bf41569 --- /dev/null +++ b/queue-6.13/selftests-bonding-fix-incorrect-mac-address.patch @@ -0,0 +1,42 @@ +From 95e2d8817164178148274e66992f149eeb595386 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 02:39:23 +0000 +Subject: selftests: bonding: fix incorrect mac address + +From: Hangbin Liu + +[ Upstream commit 9318dc2357b6b8b2ea1200ab7f2d5877851b7382 ] + +The correct mac address for NS target 2001:db8::254 is 33:33:ff:00:02:54, +not 33:33:00:00:02:54. The same with client maddress. + +Fixes: 86fb6173d11e ("selftests: bonding: add ns multicast group testing") +Acked-by: Jay Vosburgh +Reviewed-by: Nikolay Aleksandrov +Signed-off-by: Hangbin Liu +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250306023923.38777-3-liuhangbin@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/drivers/net/bonding/bond_options.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh +index edc56e2cc6069..7bc148889ca72 100755 +--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh ++++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh +@@ -11,8 +11,8 @@ ALL_TESTS=" + + lib_dir=$(dirname "$0") + source ${lib_dir}/bond_topo_3d1c.sh +-c_maddr="33:33:00:00:00:10" +-g_maddr="33:33:00:00:02:54" ++c_maddr="33:33:ff:00:00:10" ++g_maddr="33:33:ff:00:02:54" + + skip_prio() + { +-- +2.39.5 + diff --git a/queue-6.13/series b/queue-6.13/series index 958c55e54f..d3e318cf5a 100644 --- a/queue-6.13/series +++ b/queue-6.13/series @@ -10,3 +10,54 @@ mm-fix-kernel-bug-when-userfaultfd_move-encounters-swapcache.patch userfaultfd-fix-pte-unmapping-stack-allocated-pte-copies.patch mm-slab-kvfree_rcu-switch-to-wq_mem_reclaim-wq.patch virt-sev-guest-move-snp-guest-request-data-pages-handling-under-snp_cmd_mutex.patch +fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch +pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch +pinctrl-nuvoton-npcm8xx-add-null-check-in-npcm8xx_gp.patch +netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch +ice-do-not-configure-destination-override-for-switch.patch +ice-fix-memory-leak-in-arfs-after-reset.patch +ice-fix-switchdev-slow-path-in-lag.patch +netfilter-nf_conncount-garbage-collection-is-not-ski.patch +netfilter-nf_tables-make-destruction-work-queue-pern.patch +sched-address-a-potential-null-pointer-dereference-i.patch +wifi-iwlwifi-mvm-fix-pnvm-timeout-for-non-msi-x-plat.patch +wifi-mac80211-don-t-queue-sdata-work-for-a-non-runni.patch +wifi-cfg80211-cancel-wiphy_work-before-freeing-wiphy.patch +bluetooth-sco-fix-sco_conn-refcounting-on-sco_conn_r.patch +bluetooth-hci_event-fix-enabling-passive-scanning.patch +revert-bluetooth-hci_core-fix-sleeping-function-call.patch +net-mlx5-fill-out-devlink-dev-info-only-for-pfs.patch +net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch +net-mctp-i3c-copy-headers-if-cloned.patch +net-mctp-i2c-copy-headers-if-cloned.patch +netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch +drm-hyperv-fix-address-space-leak-when-hyper-v-drm-d.patch +fbdev-hyperv_fb-fix-hang-in-kdump-kernel-when-on-hyp.patch +fbdev-hyperv_fb-simplify-hvfb_putmem.patch +fbdev-hyperv_fb-allow-graceful-removal-of-framebuffe.patch +drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch +net-mlx5-handle-errors-in-mlx5_chains_create_table.patch +eth-bnxt-fix-truesize-for-mb-xdp-pass-case.patch +eth-bnxt-return-fail-if-interface-is-down-in-bnxt_qu.patch +eth-bnxt-do-not-use-bnxt_vnic_ntuple-unconditionally.patch +eth-bnxt-do-not-update-checksum-in-bnxt_xdp_build_sk.patch +eth-bnxt-fix-kernel-panic-in-the-bnxt_get_queue_stat.patch +eth-bnxt-fix-memory-leak-in-queue-reset.patch +net-switchdev-convert-blocking-notification-chain-to.patch +net-mctp-unshare-packets-when-reassembling.patch +bonding-fix-incorrect-mac-address-setting-to-receive.patch +selftests-bonding-fix-incorrect-mac-address.patch +rtase-fix-improper-release-of-ring-list-entries-in-r.patch +wifi-mac80211-fix-mpdu-length-parsing-for-eht-5-6-gh.patch +netfilter-nf_conncount-fully-initialize-struct-nf_co.patch +ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch +net_sched-prevent-creation-of-classes-with-tc_h_root.patch +netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch +gre-fix-ipv6-link-local-address-generation.patch +net-openvswitch-remove-misbehaving-actions-length-ch.patch +revert-openvswitch-switch-to-per-action-label-counti.patch +net-mlx5-hws-rightsize-bwc-matcher-priority.patch +net-mlx5-fix-incorrect-irq-pool-usage-when-releasing.patch +net-mlx5-lag-check-shared-fdb-before-creating-multip.patch +net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch +net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch diff --git a/queue-6.13/wifi-cfg80211-cancel-wiphy_work-before-freeing-wiphy.patch b/queue-6.13/wifi-cfg80211-cancel-wiphy_work-before-freeing-wiphy.patch new file mode 100644 index 0000000000..dbb6ac7a79 --- /dev/null +++ b/queue-6.13/wifi-cfg80211-cancel-wiphy_work-before-freeing-wiphy.patch @@ -0,0 +1,50 @@ +From 7d9b5be5bd612cf1e8b674fefeba4036edc1ce2b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 12:37:59 +0200 +Subject: wifi: cfg80211: cancel wiphy_work before freeing wiphy + +From: Miri Korenblit + +[ Upstream commit 72d520476a2fab6f3489e8388ab524985d6c4b90 ] + +A wiphy_work can be queued from the moment the wiphy is allocated and +initialized (i.e. wiphy_new_nm). When a wiphy_work is queued, the +rdev::wiphy_work is getting queued. + +If wiphy_free is called before the rdev::wiphy_work had a chance to run, +the wiphy memory will be freed, and then when it eventally gets to run +it'll use invalid memory. + +Fix this by canceling the work before freeing the wiphy. + +Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") +Signed-off-by: Miri Korenblit +Reviewed-by: Johannes Berg +Link: https://patch.msgid.link/20250306123626.efd1d19f6e07.I48229f96f4067ef73f5b87302335e2fd750136c9@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/core.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/net/wireless/core.c b/net/wireless/core.c +index afbdc549fb4a5..8baf22758ac12 100644 +--- a/net/wireless/core.c ++++ b/net/wireless/core.c +@@ -1198,6 +1198,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) + { + struct cfg80211_internal_bss *scan, *tmp; + struct cfg80211_beacon_registration *reg, *treg; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&rdev->wiphy_work_lock, flags); ++ WARN_ON(!list_empty(&rdev->wiphy_work_list)); ++ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); ++ cancel_work_sync(&rdev->wiphy_work); ++ + rfkill_destroy(rdev->wiphy.rfkill); + list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { + list_del(®->list); +-- +2.39.5 + diff --git a/queue-6.13/wifi-iwlwifi-mvm-fix-pnvm-timeout-for-non-msi-x-plat.patch b/queue-6.13/wifi-iwlwifi-mvm-fix-pnvm-timeout-for-non-msi-x-plat.patch new file mode 100644 index 0000000000..3b8253d12f --- /dev/null +++ b/queue-6.13/wifi-iwlwifi-mvm-fix-pnvm-timeout-for-non-msi-x-plat.patch @@ -0,0 +1,80 @@ +From f87b486dd218d546ad903d0ba5993c28b5b12d88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 12:25:47 +0200 +Subject: wifi: iwlwifi: mvm: fix PNVM timeout for non-MSI-X platforms + +From: Emmanuel Grumbach + +[ Upstream commit b8c8a03e9b7bfc06f366b75daf3d0812400e7123 ] + +When MSI-X is not enabled, we mask all the interrupts in the interrupt +handler and re-enable them when the interrupt thread runs. If +STATUS_INT_ENABLED is not set, we won't re-enable in the thread. +In order to get the ALIVE interrupt, we allow the ALIVE interrupt +itself, and RX as well in order to receive the ALIVE notification (which +is received as an RX from the firmware. + +The problem is that STATUS_INT_ENABLED is clear until the op_mode calls +trans_fw_alive which means that until trans_fw_alive is called, any +notification from the firmware will not be received. + +This became a problem when we inserted the pnvm_load exactly between the +ALIVE and trans_fw_alive. + +Fix that by calling trans_fw_alive before loading the PNVM. This will +allow to get the notification from the firmware about PNVM load being +complete and continue the flow normally. + +This didn't happen on MSI-X because we don't disable the interrupts in +the ISR when MSI-X is available. + +The error in the log looks like this: + +iwlwifi 0000:00:03.0: Timeout waiting for PNVM load! +iwlwifi 0000:00:03.0: Failed to start RT ucode: -110 +iwlwifi 0000:00:03.0: WRT: Collecting data: ini trigger 13 fired (delay=0ms). + +Fixes: 70d3ca86b025 ("iwlwifi: mvm: ring the doorbell and wait for PNVM load completion") +Signed-off-by: Emmanuel Grumbach +Reviewed-by: Johannes Berg +Signed-off-by: Miri Korenblit +Link: https://patch.msgid.link/20250306122425.0f2cf207aae1.I025d8f724b44f52eadf6c19069352eb9275613a8@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +index 5ea684802ad17..d4bc1e85b9305 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +@@ -1,6 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + /* +- * Copyright (C) 2012-2014, 2018-2024 Intel Corporation ++ * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2016-2017 Intel Deutschland GmbH + */ +@@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, + /* if reached this point, Alive notification was received */ + iwl_mei_alive_notif(true); + ++ iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); ++ + ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait, + &mvm->fw->ucode_capa); + if (ret) { +@@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, + return ret; + } + +- iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr); +- + /* + * Note: all the queues are enabled as part of the interface + * initialization, but in firmware restart scenarios they +-- +2.39.5 + diff --git a/queue-6.13/wifi-mac80211-don-t-queue-sdata-work-for-a-non-runni.patch b/queue-6.13/wifi-mac80211-don-t-queue-sdata-work-for-a-non-runni.patch new file mode 100644 index 0000000000..9388835cc1 --- /dev/null +++ b/queue-6.13/wifi-mac80211-don-t-queue-sdata-work-for-a-non-runni.patch @@ -0,0 +1,52 @@ +From 80d194bbff436385151d29ffddcb0a8dd9d1d689 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Mar 2025 12:37:56 +0200 +Subject: wifi: mac80211: don't queue sdata::work for a non-running sdata + +From: Miri Korenblit + +[ Upstream commit 20d5a0b9cd0ccb32e886cf6baecf14936325bf10 ] + +The worker really shouldn't be queued for a non-running interface. +Also, if ieee80211_setup_sdata is called between queueing and executing +the wk, it will be initialized, which will corrupt wiphy_work_list. + +Fixes: f8891461a277 ("mac80211: do not start any work during reconfigure flow") +Signed-off-by: Miri Korenblit +Reviewed-by: Johannes Berg +Link: https://patch.msgid.link/20250306123626.1e02caf82640.I4949e71ed56e7186ed4968fa9ddff477473fa2f4@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/util.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/net/mac80211/util.c b/net/mac80211/util.c +index dc0b74443c8d1..5ee7fc81ff8cf 100644 +--- a/net/mac80211/util.c ++++ b/net/mac80211/util.c +@@ -6,7 +6,7 @@ + * Copyright 2007 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2015-2017 Intel Deutschland GmbH +- * Copyright (C) 2018-2024 Intel Corporation ++ * Copyright (C) 2018-2025 Intel Corporation + * + * utilities for mac80211 + */ +@@ -2190,8 +2190,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) + ieee80211_reconfig_roc(local); + + /* Requeue all works */ +- list_for_each_entry(sdata, &local->interfaces, list) +- wiphy_work_queue(local->hw.wiphy, &sdata->work); ++ list_for_each_entry(sdata, &local->interfaces, list) { ++ if (ieee80211_sdata_running(sdata)) ++ wiphy_work_queue(local->hw.wiphy, &sdata->work); ++ } + } + + ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, +-- +2.39.5 + diff --git a/queue-6.13/wifi-mac80211-fix-mpdu-length-parsing-for-eht-5-6-gh.patch b/queue-6.13/wifi-mac80211-fix-mpdu-length-parsing-for-eht-5-6-gh.patch new file mode 100644 index 0000000000..23c72118d6 --- /dev/null +++ b/queue-6.13/wifi-mac80211-fix-mpdu-length-parsing-for-eht-5-6-gh.patch @@ -0,0 +1,53 @@ +From aa8a8541b56090f215712a9463d3f8406350b9f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Mar 2025 12:17:04 +0100 +Subject: wifi: mac80211: fix MPDU length parsing for EHT 5/6 GHz + +From: Benjamin Berg + +[ Upstream commit 8ae227f8a7749eec92fc381dfbe213429c852278 ] + +The MPDU length is only configured using the EHT capabilities element on +2.4 GHz. On 5/6 GHz it is configured using the VHT or HE capabilities +respectively. + +Fixes: cf0079279727 ("wifi: mac80211: parse A-MSDU len from EHT capabilities") +Reviewed-by: Miriam Rachel Korenblit +Signed-off-by: Benjamin Berg +Link: https://patch.msgid.link/20250311121704.0634d31f0883.I28063e4d3ef7d296b7e8a1c303460346a30bf09c@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/eht.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c +index 7a3116c36df9f..fd41046e3b681 100644 +--- a/net/mac80211/eht.c ++++ b/net/mac80211/eht.c +@@ -2,7 +2,7 @@ + /* + * EHT handling + * +- * Copyright(c) 2021-2024 Intel Corporation ++ * Copyright(c) 2021-2025 Intel Corporation + */ + + #include "ieee80211_i.h" +@@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, + link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + ++ /* ++ * The MPDU length bits are reserved on all but 2.4 GHz and get set via ++ * VHT (5 GHz) or HE (6 GHz) capabilities. ++ */ ++ if (sband->band != NL80211_BAND_2GHZ) ++ return; ++ + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], + IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { + case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: +-- +2.39.5 +