From: Sasha Levin Date: Sun, 17 Nov 2024 14:35:17 +0000 (-0500) Subject: Fixes for 6.11 X-Git-Tag: v6.12.1~66 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fed4fdfcce98940da210bfa6a99ec5d49fc9a45e;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.11 Signed-off-by: Sasha Levin --- diff --git a/queue-6.11/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch b/queue-6.11/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch new file mode 100644 index 00000000000..1e90dd8b5c3 --- /dev/null +++ b/queue-6.11/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch @@ -0,0 +1,144 @@ +From 6a2e510d640889782ddf61274d0066a970c6ba30 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Sep 2024 06:57:11 +0100 +Subject: ARM: 9419/1: mm: Fix kernel memory mapping for xip kernels + +From: Harith G + +[ Upstream commit ed6cbe6e5563452f305e89c15846820f2874e431 ] + +The patchset introducing kernel_sec_start/end variables to separate the +kernel/lowmem memory mappings, broke the mapping of the kernel memory +for xipkernels. + +kernel_sec_start/end variables are in RO area before the MMU is switched +on for xipkernels. +So these cannot be set early in boot in head.S. Fix this by setting these +after MMU is switched on. +xipkernels need two different mappings for kernel text (starting at +CONFIG_XIP_PHYS_ADDR) and data (starting at CONFIG_PHYS_OFFSET). +Also, move the kernel code mapping from devicemaps_init() to map_kernel(). + +Fixes: a91da5457085 ("ARM: 9089/1: Define kernel physical section start and end") +Signed-off-by: Harith George +Reviewed-by: Linus Walleij +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/kernel/head.S | 8 ++++++-- + arch/arm/mm/mmu.c | 34 +++++++++++++++++++++------------- + 2 files changed, 27 insertions(+), 15 deletions(-) + +diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S +index 1ec35f065617e..28873cda464f5 100644 +--- a/arch/arm/kernel/head.S ++++ b/arch/arm/kernel/head.S +@@ -252,11 +252,15 @@ __create_page_tables: + */ + add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ENTRY_ORDER) + ldr r6, =(_end - 1) ++ ++ /* For XIP, kernel_sec_start/kernel_sec_end are currently in RO memory */ ++#ifndef CONFIG_XIP_KERNEL + adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) + #if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 + str r8, [r5, #4] @ Save physical start of kernel (BE) + #else + str r8, [r5] @ Save physical start of kernel (LE) ++#endif + #endif + orr r3, r8, r7 @ Add the MMU flags + add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ENTRY_ORDER) +@@ -264,6 +268,7 @@ __create_page_tables: + add r3, r3, #1 << SECTION_SHIFT + cmp r0, r6 + bls 1b ++#ifndef CONFIG_XIP_KERNEL + eor r3, r3, r7 @ Remove the MMU flags + adr_l r5, kernel_sec_end @ _pa(kernel_sec_end) + #if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 +@@ -271,8 +276,7 @@ __create_page_tables: + #else + str r3, [r5] @ Save physical end of kernel (LE) + #endif +- +-#ifdef CONFIG_XIP_KERNEL ++#else + /* + * Map the kernel image separately as it is not located in RAM. + */ +diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c +index 3f774856ca676..b6ad0a8810e42 100644 +--- a/arch/arm/mm/mmu.c ++++ b/arch/arm/mm/mmu.c +@@ -1402,18 +1402,6 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) + create_mapping(&map); + } + +- /* +- * Map the kernel if it is XIP. +- * It is always first in the modulearea. +- */ +-#ifdef CONFIG_XIP_KERNEL +- map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); +- map.virtual = MODULES_VADDR; +- map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; +- map.type = MT_ROM; +- create_mapping(&map); +-#endif +- + /* + * Map the cache flushing regions. + */ +@@ -1603,12 +1591,27 @@ static void __init map_kernel(void) + * This will only persist until we turn on proper memory management later on + * and we remap the whole kernel with page granularity. + */ ++#ifdef CONFIG_XIP_KERNEL ++ phys_addr_t kernel_nx_start = kernel_sec_start; ++#else + phys_addr_t kernel_x_start = kernel_sec_start; + phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + phys_addr_t kernel_nx_start = kernel_x_end; ++#endif + phys_addr_t kernel_nx_end = kernel_sec_end; + struct map_desc map; + ++ /* ++ * Map the kernel if it is XIP. ++ * It is always first in the modulearea. ++ */ ++#ifdef CONFIG_XIP_KERNEL ++ map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); ++ map.virtual = MODULES_VADDR; ++ map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; ++ map.type = MT_ROM; ++ create_mapping(&map); ++#else + map.pfn = __phys_to_pfn(kernel_x_start); + map.virtual = __phys_to_virt(kernel_x_start); + map.length = kernel_x_end - kernel_x_start; +@@ -1618,7 +1621,7 @@ static void __init map_kernel(void) + /* If the nx part is small it may end up covered by the tail of the RWX section */ + if (kernel_x_end == kernel_nx_end) + return; +- ++#endif + map.pfn = __phys_to_pfn(kernel_nx_start); + map.virtual = __phys_to_virt(kernel_nx_start); + map.length = kernel_nx_end - kernel_nx_start; +@@ -1762,6 +1765,11 @@ void __init paging_init(const struct machine_desc *mdesc) + { + void *zero_page; + ++#ifdef CONFIG_XIP_KERNEL ++ /* Store the kernel RW RAM region start/end in these variables */ ++ kernel_sec_start = CONFIG_PHYS_OFFSET & SECTION_MASK; ++ kernel_sec_end = round_up(__pa(_end), SECTION_SIZE); ++#endif + pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n", + kernel_sec_start, kernel_sec_end); + +-- +2.43.0 + diff --git a/queue-6.11/arm-fix-cacheflush-with-pan.patch b/queue-6.11/arm-fix-cacheflush-with-pan.patch new file mode 100644 index 00000000000..35071c92496 --- /dev/null +++ b/queue-6.11/arm-fix-cacheflush-with-pan.patch @@ -0,0 +1,50 @@ +From e34e26fd70f46e095238fcb902f33ee36c34824b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 10:16:13 +0000 +Subject: ARM: fix cacheflush with PAN +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Russell King (Oracle) + +[ Upstream commit ca29cfcc4a21083d671522ad384532e28a43f033 ] + +It seems that the cacheflush syscall got broken when PAN for LPAE was +implemented. User access was not enabled around the cache maintenance +instructions, causing them to fault. + +Fixes: 7af5b901e847 ("ARM: 9358/2: Implement PAN for LPAE by TTBR0 page table walks disablement") +Reported-by: Michał Pecio +Tested-by: Michał Pecio +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/kernel/traps.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c +index 480e307501bb4..6ea645939573f 100644 +--- a/arch/arm/kernel/traps.c ++++ b/arch/arm/kernel/traps.c +@@ -570,6 +570,7 @@ static int bad_syscall(int n, struct pt_regs *regs) + static inline int + __do_cache_op(unsigned long start, unsigned long end) + { ++ unsigned int ua_flags; + int ret; + + do { +@@ -578,7 +579,9 @@ __do_cache_op(unsigned long start, unsigned long end) + if (fatal_signal_pending(current)) + return 0; + ++ ua_flags = uaccess_save_and_enable(); + ret = flush_icache_user_range(start, start + chunk); ++ uaccess_restore(ua_flags); + if (ret) + return ret; + +-- +2.43.0 + diff --git a/queue-6.11/bluetooth-btintel-direct-exception-event-to-bluetoot.patch b/queue-6.11/bluetooth-btintel-direct-exception-event-to-bluetoot.patch new file mode 100644 index 00000000000..8c88feb5e15 --- /dev/null +++ b/queue-6.11/bluetooth-btintel-direct-exception-event-to-bluetoot.patch @@ -0,0 +1,55 @@ +From 20ee5a56a35e3508a6079be355690c081c1eb426 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 14:41:34 +0530 +Subject: Bluetooth: btintel: Direct exception event to bluetooth stack + +From: Kiran K + +[ Upstream commit d5359a7f583ab9b7706915213b54deac065bcb81 ] + +Have exception event part of HCI traces which helps for debug. + +snoop traces: +> HCI Event: Vendor (0xff) plen 79 + Vendor Prefix (0x8780) + Intel Extended Telemetry (0x03) + Unknown extended telemetry event type (0xde) + 01 01 de + Unknown extended subevent 0x07 + 01 01 de 07 01 de 06 1c ef be ad de ef be ad de + ef be ad de ef be ad de ef be ad de ef be ad de + ef be ad de 05 14 ef be ad de ef be ad de ef be + ad de ef be ad de ef be ad de 43 10 ef be ad de + ef be ad de ef be ad de ef be ad de + +Fixes: af395330abed ("Bluetooth: btintel: Add Intel devcoredump support") +Signed-off-by: Kiran K +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btintel.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c +index 1ccbb51575153..24d2f4f37d0fd 100644 +--- a/drivers/bluetooth/btintel.c ++++ b/drivers/bluetooth/btintel.c +@@ -3288,13 +3288,12 @@ static int btintel_diagnostics(struct hci_dev *hdev, struct sk_buff *skb) + case INTEL_TLV_TEST_EXCEPTION: + /* Generate devcoredump from exception */ + if (!hci_devcd_init(hdev, skb->len)) { +- hci_devcd_append(hdev, skb); ++ hci_devcd_append(hdev, skb_clone(skb, GFP_ATOMIC)); + hci_devcd_complete(hdev); + } else { + bt_dev_err(hdev, "Failed to generate devcoredump"); +- kfree_skb(skb); + } +- return 0; ++ break; + default: + bt_dev_err(hdev, "Invalid exception type %02X", tlv->val[0]); + } +-- +2.43.0 + diff --git a/queue-6.11/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch b/queue-6.11/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch new file mode 100644 index 00000000000..0ed93cc0cd4 --- /dev/null +++ b/queue-6.11/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch @@ -0,0 +1,39 @@ +From e11f93c09eda82398af305b8a71bb76a93961093 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 11:19:54 -0500 +Subject: Bluetooth: hci_core: Fix calling mgmt_device_connected + +From: Luiz Augusto von Dentz + +[ Upstream commit 7967dc8f797f454d4f4acec15c7df0cdf4801617 ] + +Since 61a939c68ee0 ("Bluetooth: Queue incoming ACL data until +BT_CONNECTED state is reached") there is no long the need to call +mgmt_device_connected as ACL data will be queued until BT_CONNECTED +state. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219458 +Link: https://github.com/bluez/bluez/issues/1014 +Fixes: 333b4fd11e89 ("Bluetooth: L2CAP: Fix uaf in l2cap_connect") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_core.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c +index 6e07350817bec..eeb4f025ca3bf 100644 +--- a/net/bluetooth/hci_core.c ++++ b/net/bluetooth/hci_core.c +@@ -3788,8 +3788,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) + + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_handle(hdev, handle); +- if (conn && hci_dev_test_flag(hdev, HCI_MGMT)) +- mgmt_device_connected(hdev, conn, NULL, 0); + hci_dev_unlock(hdev); + + if (conn) { +-- +2.43.0 + diff --git a/queue-6.11/bonding-add-ns-target-multicast-address-to-slave-dev.patch b/queue-6.11/bonding-add-ns-target-multicast-address-to-slave-dev.patch new file mode 100644 index 00000000000..1178fe773b1 --- /dev/null +++ b/queue-6.11/bonding-add-ns-target-multicast-address-to-slave-dev.patch @@ -0,0 +1,237 @@ +From f4a999b3fdfaf85c91e781393376e376b9fbbc5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Nov 2024 10:16:49 +0000 +Subject: bonding: add ns target multicast address to slave device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hangbin Liu + +[ Upstream commit 8eb36164d1a6769a20ed43033510067ff3dab9ee ] + +Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves") +tried to resolve the issue where backup slaves couldn't be brought up when +receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only +worked for drivers that receive all multicast messages, such as the veth +interface. + +For standard drivers, the NS multicast message is silently dropped because +the slave device is not a member of the NS target multicast group. + +To address this, we need to make the slave device join the NS target +multicast group, ensuring it can receive these IPv6 NS messages to validate +the slave’s status properly. + +There are three policies before joining the multicast group: +1. All settings must be under active-backup mode (alb and tlb do not support + arp_validate), with backup slaves and slaves supporting multicast. +2. We can add or remove multicast groups when arp_validate changes. +3. Other operations, such as enslaving, releasing, or setting NS targets, + need to be guarded by arp_validate. + +Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") +Signed-off-by: Hangbin Liu +Reviewed-by: Nikolay Aleksandrov +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 16 +++++- + drivers/net/bonding/bond_options.c | 82 +++++++++++++++++++++++++++++- + include/net/bond_options.h | 2 + + 3 files changed, 98 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index e20bee1bdffd7..66725c1632635 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -934,6 +934,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, + + if (bond->dev->flags & IFF_UP) + bond_hw_addr_flush(bond->dev, old_active->dev); ++ ++ bond_slave_ns_maddrs_add(bond, old_active); + } + + if (new_active) { +@@ -950,6 +952,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, + dev_mc_sync(new_active->dev, bond->dev); + netif_addr_unlock_bh(bond->dev); + } ++ ++ bond_slave_ns_maddrs_del(bond, new_active); + } + } + +@@ -2267,6 +2271,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + bond_compute_features(bond); + bond_set_carrier(bond); + ++ /* Needs to be called before bond_select_active_slave(), which will ++ * remove the maddrs if the slave is selected as active slave. ++ */ ++ bond_slave_ns_maddrs_add(bond, new_slave); ++ + if (bond_uses_primary(bond)) { + block_netpoll_tx(); + bond_select_active_slave(bond); +@@ -2276,7 +2285,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + if (bond_mode_can_use_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); + +- + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { +@@ -2474,6 +2482,12 @@ static int __bond_release_one(struct net_device *bond_dev, + if (oldcurrent == slave) + bond_change_active_slave(bond, NULL); + ++ /* Must be called after bond_change_active_slave () as the slave ++ * might change from an active slave to a backup slave. Then it is ++ * necessary to clear the maddrs on the backup slave. ++ */ ++ bond_slave_ns_maddrs_del(bond, slave); ++ + if (bond_is_lb(bond)) { + /* Must be called only after the slave has been + * detached from the list and the curr_active_slave +diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c +index 95d59a18c0223..327b6ecdc77e0 100644 +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -15,6 +15,7 @@ + #include + + #include ++#include + + static int bond_option_active_slave_set(struct bonding *bond, + const struct bond_opt_value *newval); +@@ -1234,6 +1235,68 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond, + } + + #if IS_ENABLED(CONFIG_IPV6) ++static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *slave) ++{ ++ return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && ++ !bond_is_active_slave(slave) && ++ slave->dev->flags & IFF_MULTICAST; ++} ++ ++static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) ++{ ++ struct in6_addr *targets = bond->params.ns_targets; ++ char slot_maddr[MAX_ADDR_LEN]; ++ int i; ++ ++ if (!slave_can_set_ns_maddr(bond, slave)) ++ return; ++ ++ for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { ++ if (ipv6_addr_any(&targets[i])) ++ break; ++ ++ if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) { ++ if (add) ++ dev_mc_add(slave->dev, slot_maddr); ++ else ++ dev_mc_del(slave->dev, slot_maddr); ++ } ++ } ++} ++ ++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave) ++{ ++ if (!bond->params.arp_validate) ++ return; ++ slave_set_ns_maddrs(bond, slave, true); ++} ++ ++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) ++{ ++ if (!bond->params.arp_validate) ++ return; ++ slave_set_ns_maddrs(bond, slave, false); ++} ++ ++static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave, ++ struct in6_addr *target, struct in6_addr *slot) ++{ ++ char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN]; ++ ++ if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave)) ++ return; ++ ++ /* remove the previous maddr from slave */ ++ if (!ipv6_addr_any(slot) && ++ !ndisc_mc_map(slot, slot_maddr, slave->dev, 0)) ++ dev_mc_del(slave->dev, slot_maddr); ++ ++ /* add new maddr on slave if target is set */ ++ if (!ipv6_addr_any(target) && ++ !ndisc_mc_map(target, target_maddr, slave->dev, 0)) ++ dev_mc_add(slave->dev, target_maddr); ++} ++ + static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, + struct in6_addr *target, + unsigned long last_rx) +@@ -1243,8 +1306,10 @@ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot, + struct slave *slave; + + if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) { +- bond_for_each_slave(bond, slave, iter) ++ bond_for_each_slave(bond, slave, iter) { + slave->target_last_arp_rx[slot] = last_rx; ++ slave_set_ns_maddr(bond, slave, target, &targets[slot]); ++ } + targets[slot] = *target; + } + } +@@ -1296,15 +1361,30 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond, + { + return -EPERM; + } ++ ++static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) {} ++ ++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave) {} ++ ++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) {} + #endif + + static int bond_option_arp_validate_set(struct bonding *bond, + const struct bond_opt_value *newval) + { ++ bool changed = !!bond->params.arp_validate != !!newval->value; ++ struct list_head *iter; ++ struct slave *slave; ++ + netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n", + newval->string, newval->value); + bond->params.arp_validate = newval->value; + ++ if (changed) { ++ bond_for_each_slave(bond, slave, iter) ++ slave_set_ns_maddrs(bond, slave, !!bond->params.arp_validate); ++ } ++ + return 0; + } + +diff --git a/include/net/bond_options.h b/include/net/bond_options.h +index 473a0147769eb..18687ccf06383 100644 +--- a/include/net/bond_options.h ++++ b/include/net/bond_options.h +@@ -161,5 +161,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond); + #if IS_ENABLED(CONFIG_IPV6) + void bond_option_ns_ip6_targets_clear(struct bonding *bond); + #endif ++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave); ++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave); + + #endif /* _NET_BOND_OPTIONS_H */ +-- +2.43.0 + diff --git a/queue-6.11/drivers-perf-fix-wrong-put_cpu-placement.patch b/queue-6.11/drivers-perf-fix-wrong-put_cpu-placement.patch new file mode 100644 index 00000000000..ea96c5b1e29 --- /dev/null +++ b/queue-6.11/drivers-perf-fix-wrong-put_cpu-placement.patch @@ -0,0 +1,50 @@ +From f65d34dc605413f12ef026475132085372e29a7d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 12:34:22 +0100 +Subject: drivers: perf: Fix wrong put_cpu() placement + +From: Alexandre Ghiti + +[ Upstream commit 57f7c7dc78cd09622b12920d92b40c1ce11b234e ] + +Unfortunately, the wrong patch version was merged which places the +put_cpu() after enabling a static key, which is not safe as pointed by +Will [1], so move put_cpu() before to avoid this. + +Fixes: 2840dadf0dde ("drivers: perf: Fix smp_processor_id() use in preemptible code") +Reported-by: Atish Patra +Link: https://lore.kernel.org/all/20240827125335.GD4772@willie-the-truck/ [1] +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20241112113422.617954-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + drivers/perf/riscv_pmu_sbi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c +index 671dc55cbd3a8..bc562c759e1e9 100644 +--- a/drivers/perf/riscv_pmu_sbi.c ++++ b/drivers/perf/riscv_pmu_sbi.c +@@ -1380,8 +1380,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) + goto out_unregister; + + cpu = get_cpu(); +- + ret = pmu_sbi_snapshot_setup(pmu, cpu); ++ put_cpu(); ++ + if (ret) { + /* Snapshot is an optional feature. Continue if not available */ + pmu_sbi_snapshot_free(pmu); +@@ -1395,7 +1396,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) + */ + static_branch_enable(&sbi_pmu_snapshot_available); + } +- put_cpu(); + } + + register_sysctl("kernel", sbi_pmu_sysctl_table); +-- +2.43.0 + diff --git a/queue-6.11/drm-i915-gsc-arl-h-and-arl-u-need-a-newer-gsc-fw.patch b/queue-6.11/drm-i915-gsc-arl-h-and-arl-u-need-a-newer-gsc-fw.patch new file mode 100644 index 00000000000..1fc4deaa8a0 --- /dev/null +++ b/queue-6.11/drm-i915-gsc-arl-h-and-arl-u-need-a-newer-gsc-fw.patch @@ -0,0 +1,224 @@ +From 5a71f5adfa683a7d2aa27efc8a0126bf58348740 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Oct 2024 16:31:32 -0700 +Subject: drm/i915/gsc: ARL-H and ARL-U need a newer GSC FW. + +From: Daniele Ceraolo Spurio + +[ Upstream commit db0fc586edde83ff7ff65fea56c4f72dae511764 ] + +All MTL and ARL SKUs share the same GSC FW, but the newer platforms are +only supported in newer blobs. In particular, ARL-S is supported +starting from 102.0.10.1878 (which is already the minimum required +version for ARL in the code), while ARL-H and ARL-U are supported from +102.1.15.1926. Therefore, the driver needs to check which specific ARL +subplatform its running on when verifying that the GSC FW is new enough +for it. + +Fixes: 2955ae8186c8 ("drm/i915: ARL requires a newer GSC firmware") +Signed-off-by: Daniele Ceraolo Spurio +Cc: John Harrison +Cc: Rodrigo Vivi +Reviewed-by: Rodrigo Vivi +Reviewed-by: John Harrison +Link: https://patchwork.freedesktop.org/patch/msgid/20241028233132.149745-1-daniele.ceraolospurio@intel.com +(cherry picked from commit 3c1d5ced18db8a67251c8436cf9bdc061f972bdb) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 50 +++++++++++++++-------- + drivers/gpu/drm/i915/i915_drv.h | 8 +++- + drivers/gpu/drm/i915/intel_device_info.c | 24 ++++++++--- + drivers/gpu/drm/i915/intel_device_info.h | 4 +- + include/drm/intel/i915_pciids.h | 19 +++++++-- + 5 files changed, 75 insertions(+), 30 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +index 551b0d7974ff1..5dc0ccd076363 100644 +--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c ++++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +@@ -80,6 +80,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s + const struct intel_gsc_cpd_header_v2 *cpd_header = NULL; + const struct intel_gsc_cpd_entry *cpd_entry = NULL; + const struct intel_gsc_manifest_header *manifest; ++ struct intel_uc_fw_ver min_ver = { 0 }; + size_t min_size = sizeof(*layout); + int i; + +@@ -212,33 +213,46 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s + } + } + +- if (IS_ARROWLAKE(gt->i915)) { ++ /* ++ * ARL SKUs require newer firmwares, but the blob is actually common ++ * across all MTL and ARL SKUs, so we need to do an explicit version check ++ * here rather than using a separate table entry. If a too old version ++ * is found, then just don't use GSC rather than aborting the driver load. ++ * Note that the major number in the GSC FW version is used to indicate ++ * the platform, so we expect it to always be 102 for MTL/ARL binaries. ++ */ ++ if (IS_ARROWLAKE_S(gt->i915)) ++ min_ver = (struct intel_uc_fw_ver){ 102, 0, 10, 1878 }; ++ else if (IS_ARROWLAKE_H(gt->i915) || IS_ARROWLAKE_U(gt->i915)) ++ min_ver = (struct intel_uc_fw_ver){ 102, 1, 15, 1926 }; ++ ++ if (IS_METEORLAKE(gt->i915) && gsc->release.major != 102) { ++ gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x", ++ gsc->release.major, gsc->release.minor, ++ gsc->release.patch, gsc->release.build); ++ return -EINVAL; ++ } ++ ++ if (min_ver.major) { + bool too_old = false; + +- /* +- * ARL requires a newer firmware than MTL did (102.0.10.1878) but the +- * firmware is actually common. So, need to do an explicit version check +- * here rather than using a separate table entry. And if the older +- * MTL-only version is found, then just don't use GSC rather than aborting +- * the driver load. +- */ +- if (gsc->release.major < 102) { ++ if (gsc->release.minor < min_ver.minor) { + too_old = true; +- } else if (gsc->release.major == 102) { +- if (gsc->release.minor == 0) { +- if (gsc->release.patch < 10) { ++ } else if (gsc->release.minor == min_ver.minor) { ++ if (gsc->release.patch < min_ver.patch) { ++ too_old = true; ++ } else if (gsc->release.patch == min_ver.patch) { ++ if (gsc->release.build < min_ver.build) + too_old = true; +- } else if (gsc->release.patch == 10) { +- if (gsc->release.build < 1878) +- too_old = true; +- } + } + } + + if (too_old) { +- gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", ++ gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least %d.%d.%d.%d", + gsc->release.major, gsc->release.minor, +- gsc->release.patch, gsc->release.build); ++ gsc->release.patch, gsc->release.build, ++ min_ver.major, min_ver.minor, ++ min_ver.patch, min_ver.build); + return -EINVAL; + } + } +diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h +index 110340e02a021..0c0c666f11ea2 100644 +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -546,8 +546,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, + #define IS_LUNARLAKE(i915) (0 && i915) + #define IS_BATTLEMAGE(i915) (0 && i915) + +-#define IS_ARROWLAKE(i915) \ +- IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) ++#define IS_ARROWLAKE_H(i915) \ ++ IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL_H) ++#define IS_ARROWLAKE_U(i915) \ ++ IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL_U) ++#define IS_ARROWLAKE_S(i915) \ ++ IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL_S) + #define IS_DG2_G10(i915) \ + IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) + #define IS_DG2_G11(i915) \ +diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c +index 01a6502530501..bd0cb707e9d49 100644 +--- a/drivers/gpu/drm/i915/intel_device_info.c ++++ b/drivers/gpu/drm/i915/intel_device_info.c +@@ -202,8 +202,16 @@ static const u16 subplatform_g12_ids[] = { + INTEL_DG2_G12_IDS(ID), + }; + +-static const u16 subplatform_arl_ids[] = { +- INTEL_ARL_IDS(ID), ++static const u16 subplatform_arl_h_ids[] = { ++ INTEL_ARL_H_IDS(ID), ++}; ++ ++static const u16 subplatform_arl_u_ids[] = { ++ INTEL_ARL_U_IDS(ID), ++}; ++ ++static const u16 subplatform_arl_s_ids[] = { ++ INTEL_ARL_S_IDS(ID), + }; + + static bool find_devid(u16 id, const u16 *p, unsigned int num) +@@ -263,9 +271,15 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) + } else if (find_devid(devid, subplatform_g12_ids, + ARRAY_SIZE(subplatform_g12_ids))) { + mask = BIT(INTEL_SUBPLATFORM_G12); +- } else if (find_devid(devid, subplatform_arl_ids, +- ARRAY_SIZE(subplatform_arl_ids))) { +- mask = BIT(INTEL_SUBPLATFORM_ARL); ++ } else if (find_devid(devid, subplatform_arl_h_ids, ++ ARRAY_SIZE(subplatform_arl_h_ids))) { ++ mask = BIT(INTEL_SUBPLATFORM_ARL_H); ++ } else if (find_devid(devid, subplatform_arl_u_ids, ++ ARRAY_SIZE(subplatform_arl_u_ids))) { ++ mask = BIT(INTEL_SUBPLATFORM_ARL_U); ++ } else if (find_devid(devid, subplatform_arl_s_ids, ++ ARRAY_SIZE(subplatform_arl_s_ids))) { ++ mask = BIT(INTEL_SUBPLATFORM_ARL_S); + } + + GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); +diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h +index 643ff1bf74eeb..a9fcaf33df9e2 100644 +--- a/drivers/gpu/drm/i915/intel_device_info.h ++++ b/drivers/gpu/drm/i915/intel_device_info.h +@@ -128,7 +128,9 @@ enum intel_platform { + #define INTEL_SUBPLATFORM_RPLU 2 + + /* MTL */ +-#define INTEL_SUBPLATFORM_ARL 0 ++#define INTEL_SUBPLATFORM_ARL_H 0 ++#define INTEL_SUBPLATFORM_ARL_U 1 ++#define INTEL_SUBPLATFORM_ARL_S 2 + + enum intel_ppgtt_type { + INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, +diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h +index 2bf03ebfcf73d..f35534522d333 100644 +--- a/include/drm/intel/i915_pciids.h ++++ b/include/drm/intel/i915_pciids.h +@@ -771,13 +771,24 @@ + INTEL_ATS_M150_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) + +-/* MTL */ +-#define INTEL_ARL_IDS(MACRO__, ...) \ +- MACRO__(0x7D41, ## __VA_ARGS__), \ ++/* ARL */ ++#define INTEL_ARL_H_IDS(MACRO__, ...) \ + MACRO__(0x7D51, ## __VA_ARGS__), \ +- MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + ++#define INTEL_ARL_U_IDS(MACRO__, ...) \ ++ MACRO__(0x7D41, ## __VA_ARGS__) \ ++ ++#define INTEL_ARL_S_IDS(MACRO__, ...) \ ++ MACRO__(0x7D67, ## __VA_ARGS__), \ ++ MACRO__(0xB640, ## __VA_ARGS__) ++ ++#define INTEL_ARL_IDS(MACRO__, ...) \ ++ INTEL_ARL_H_IDS(MACRO__, ## __VA_ARGS__), \ ++ INTEL_ARL_U_IDS(MACRO__, ## __VA_ARGS__), \ ++ INTEL_ARL_S_IDS(MACRO__, ## __VA_ARGS__) ++ ++/* MTL */ + #define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ + MACRO__(0x7D40, ## __VA_ARGS__), \ +-- +2.43.0 + diff --git a/queue-6.11/drm-panthor-fix-handling-of-partial-gpu-mapping-of-b.patch b/queue-6.11/drm-panthor-fix-handling-of-partial-gpu-mapping-of-b.patch new file mode 100644 index 00000000000..1aadc9b5742 --- /dev/null +++ b/queue-6.11/drm-panthor-fix-handling-of-partial-gpu-mapping-of-b.patch @@ -0,0 +1,83 @@ +From baaed99024a5808d9e83e6826b3ff945f9478783 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Nov 2024 13:47:20 +0000 +Subject: drm/panthor: Fix handling of partial GPU mapping of BOs + +From: Akash Goel + +[ Upstream commit 3387e043918e154ca08d83954966a8b087fe2835 ] + +This commit fixes the bug in the handling of partial mapping of the +buffer objects to the GPU, which caused kernel warnings. + +Panthor didn't correctly handle the case where the partial mapping +spanned multiple scatterlists and the mapping offset didn't point +to the 1st page of starting scatterlist. The offset variable was +not cleared after reaching the starting scatterlist. + +Following warning messages were seen. +WARNING: CPU: 1 PID: 650 at drivers/iommu/io-pgtable-arm.c:659 __arm_lpae_unmap+0x254/0x5a0 + +pc : __arm_lpae_unmap+0x254/0x5a0 +lr : __arm_lpae_unmap+0x2cc/0x5a0 + +Call trace: + __arm_lpae_unmap+0x254/0x5a0 + __arm_lpae_unmap+0x108/0x5a0 + __arm_lpae_unmap+0x108/0x5a0 + __arm_lpae_unmap+0x108/0x5a0 + arm_lpae_unmap_pages+0x80/0xa0 + panthor_vm_unmap_pages+0xac/0x1c8 [panthor] + panthor_gpuva_sm_step_unmap+0x4c/0xc8 [panthor] + op_unmap_cb.isra.23.constprop.30+0x54/0x80 + __drm_gpuvm_sm_unmap+0x184/0x1c8 + drm_gpuvm_sm_unmap+0x40/0x60 + panthor_vm_exec_op+0xa8/0x120 [panthor] + panthor_vm_bind_exec_sync_op+0xc4/0xe8 [panthor] + panthor_ioctl_vm_bind+0x10c/0x170 [panthor] + drm_ioctl_kernel+0xbc/0x138 + drm_ioctl+0x210/0x4b0 + __arm64_sys_ioctl+0xb0/0xf8 + invoke_syscall+0x4c/0x110 + el0_svc_common.constprop.1+0x98/0xf8 + do_el0_svc+0x24/0x38 + el0_svc+0x34/0xc8 + el0t_64_sync_handler+0xa0/0xc8 + el0t_64_sync+0x174/0x178 + +panthor : [drm] drm_WARN_ON(unmapped_sz != pgsize * pgcount) +WARNING: CPU: 1 PID: 650 at drivers/gpu/drm/panthor/panthor_mmu.c:922 panthor_vm_unmap_pages+0x124/0x1c8 [panthor] + +pc : panthor_vm_unmap_pages+0x124/0x1c8 [panthor] +lr : panthor_vm_unmap_pages+0x124/0x1c8 [panthor] + +panthor : [drm] *ERROR* failed to unmap range ffffa388f000-ffffa3890000 (requested range ffffa388c000-ffffa3890000) + +Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") +Signed-off-by: Akash Goel +Reviewed-by: Liviu Dudau +Reviewed-by: Steven Price +Reviewed-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20241111134720.780403-1-akash.goel@arm.com +Signed-off-by: Liviu Dudau +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panthor/panthor_mmu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c +index d18f32640a79f..64378e8f124bd 100644 +--- a/drivers/gpu/drm/panthor/panthor_mmu.c ++++ b/drivers/gpu/drm/panthor/panthor_mmu.c +@@ -990,6 +990,8 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, + + if (!size) + break; ++ ++ offset = 0; + } + + return panthor_vm_flush_range(vm, start_iova, iova - start_iova); +-- +2.43.0 + diff --git a/queue-6.11/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch b/queue-6.11/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch new file mode 100644 index 00000000000..328e7d6e47a --- /dev/null +++ b/queue-6.11/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch @@ -0,0 +1,47 @@ +From 3bccc7c32fa2f74c17e387efba94f0358edfbc27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Oct 2024 15:28:06 +0800 +Subject: drm/rockchip: vop: Fix a dereferenced before check warning + +From: Andy Yan + +[ Upstream commit ab1c793f457f740ab7108cc0b1340a402dbf484d ] + +The 'state' can't be NULL, we should check crtc_state. + +Fix warning: +drivers/gpu/drm/rockchip/rockchip_drm_vop.c:1096 +vop_plane_atomic_async_check() warn: variable dereferenced before check +'state' (see line 1077) + +Fixes: 5ddb0bd4ddc3 ("drm/atomic: Pass the full state to planes async atomic check and update") +Signed-off-by: Andy Yan +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20241021072818.61621-1-andyshrk@163.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +index f161f40d8ce4c..69900138295bf 100644 +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +@@ -1093,10 +1093,10 @@ static int vop_plane_atomic_async_check(struct drm_plane *plane, + if (!plane->state->fb) + return -EINVAL; + +- if (state) +- crtc_state = drm_atomic_get_existing_crtc_state(state, +- new_plane_state->crtc); +- else /* Special case for asynchronous cursor updates. */ ++ crtc_state = drm_atomic_get_existing_crtc_state(state, new_plane_state->crtc); ++ ++ /* Special case for asynchronous cursor updates. */ ++ if (!crtc_state) + crtc_state = plane->crtc->state; + + return drm_atomic_helper_check_plane_state(plane->state, crtc_state, +-- +2.43.0 + diff --git a/queue-6.11/drm-vmwgfx-avoid-null_ptr_deref-in-vmw_framebuffer_s.patch b/queue-6.11/drm-vmwgfx-avoid-null_ptr_deref-in-vmw_framebuffer_s.patch new file mode 100644 index 00000000000..772d1fcff25 --- /dev/null +++ b/queue-6.11/drm-vmwgfx-avoid-null_ptr_deref-in-vmw_framebuffer_s.patch @@ -0,0 +1,39 @@ +From 432eb812ea621b9c4f0b2f8d57936dfc1cabc02d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Oct 2024 08:34:29 +0000 +Subject: drm/vmwgfx: avoid null_ptr_deref in + vmw_framebuffer_surface_create_handle + +From: Chen Ridong + +[ Upstream commit 93d1f41a82de382845af460bf03bcb17dcbf08c5 ] + +The 'vmw_user_object_buffer' function may return NULL with incorrect +inputs. To avoid possible null pointer dereference, add a check whether +the 'bo' is NULL in the vmw_framebuffer_surface_create_handle. + +Fixes: d6667f0ddf46 ("drm/vmwgfx: Fix handling of dumb buffers") +Signed-off-by: Chen Ridong +Signed-off-by: Zack Rusin +Link: https://patchwork.freedesktop.org/patch/msgid/20241029083429.1185479-1-chenridong@huaweicloud.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index 63b8d7591253c..10d596cb4b402 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -1265,6 +1265,8 @@ static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb, + struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb); + struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo); + ++ if (WARN_ON(!bo)) ++ return -EINVAL; + return drm_gem_handle_create(file_priv, &bo->tbo.base, handle); + } + +-- +2.43.0 + diff --git a/queue-6.11/mptcp-cope-racing-subflow-creation-in-mptcp_rcv_spac.patch b/queue-6.11/mptcp-cope-racing-subflow-creation-in-mptcp_rcv_spac.patch new file mode 100644 index 00000000000..507aa14bd51 --- /dev/null +++ b/queue-6.11/mptcp-cope-racing-subflow-creation-in-mptcp_rcv_spac.patch @@ -0,0 +1,48 @@ +From 60353edb7d3155bec78c9b9a4bf6a942ddf32783 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 11:58:17 +0100 +Subject: mptcp: cope racing subflow creation in mptcp_rcv_space_adjust + +From: Paolo Abeni + +[ Upstream commit ce7356ae35943cc6494cc692e62d51a734062b7d ] + +Additional active subflows - i.e. created by the in kernel path +manager - are included into the subflow list before starting the +3whs. + +A racing recvmsg() spooling data received on an already established +subflow would unconditionally call tcp_cleanup_rbuf() on all the +current subflows, potentially hitting a divide by zero error on +the newly created ones. + +Explicitly check that the subflow is in a suitable state before +invoking tcp_cleanup_rbuf(). + +Fixes: c76c6956566f ("mptcp: call tcp_cleanup_rbuf on subflows") +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/02374660836e1b52afc91966b7535c8c5f7bafb0.1731060874.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index f3e54c836ba56..7913ba6b5daa3 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2082,7 +2082,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) + slow = lock_sock_fast(ssk); + WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); + WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp); +- tcp_cleanup_rbuf(ssk, 1); ++ if (tcp_can_send_ack(ssk)) ++ tcp_cleanup_rbuf(ssk, 1); + unlock_sock_fast(ssk, slow); + } + } +-- +2.43.0 + diff --git a/queue-6.11/mptcp-error-out-earlier-on-disconnect.patch b/queue-6.11/mptcp-error-out-earlier-on-disconnect.patch new file mode 100644 index 00000000000..011506ce799 --- /dev/null +++ b/queue-6.11/mptcp-error-out-earlier-on-disconnect.patch @@ -0,0 +1,120 @@ +From 1e172628152d659edcf1aa7d75c808e071e679c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 11:58:16 +0100 +Subject: mptcp: error out earlier on disconnect + +From: Paolo Abeni + +[ Upstream commit 581302298524e9d77c4c44ff5156a6cd112227ae ] + +Eric reported a division by zero splat in the MPTCP protocol: + +Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI +CPU: 1 UID: 0 PID: 6094 Comm: syz-executor317 Not tainted +6.12.0-rc5-syzkaller-00291-g05b92660cdfe #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, +BIOS Google 09/13/2024 +RIP: 0010:__tcp_select_window+0x5b4/0x1310 net/ipv4/tcp_output.c:3163 +Code: f6 44 01 e3 89 df e8 9b 75 09 f8 44 39 f3 0f 8d 11 ff ff ff e8 +0d 74 09 f8 45 89 f4 e9 04 ff ff ff e8 00 74 09 f8 44 89 f0 99 7c +24 14 41 29 d6 45 89 f4 e9 ec fe ff ff e8 e8 73 09 f8 48 89 +RSP: 0018:ffffc900041f7930 EFLAGS: 00010293 +RAX: 0000000000017e67 RBX: 0000000000017e67 RCX: ffffffff8983314b +RDX: 0000000000000000 RSI: ffffffff898331b0 RDI: 0000000000000004 +RBP: 00000000005d6000 R08: 0000000000000004 R09: 0000000000017e67 +R10: 0000000000003e80 R11: 0000000000000000 R12: 0000000000003e80 +R13: ffff888031d9b440 R14: 0000000000017e67 R15: 00000000002eb000 +FS: 00007feb5d7f16c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007feb5d8adbb8 CR3: 0000000074e4c000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + +__tcp_cleanup_rbuf+0x3e7/0x4b0 net/ipv4/tcp.c:1493 +mptcp_rcv_space_adjust net/mptcp/protocol.c:2085 [inline] +mptcp_recvmsg+0x2156/0x2600 net/mptcp/protocol.c:2289 +inet_recvmsg+0x469/0x6a0 net/ipv4/af_inet.c:885 +sock_recvmsg_nosec net/socket.c:1051 [inline] +sock_recvmsg+0x1b2/0x250 net/socket.c:1073 +__sys_recvfrom+0x1a5/0x2e0 net/socket.c:2265 +__do_sys_recvfrom net/socket.c:2283 [inline] +__se_sys_recvfrom net/socket.c:2279 [inline] +__x64_sys_recvfrom+0xe0/0x1c0 net/socket.c:2279 +do_syscall_x64 arch/x86/entry/common.c:52 [inline] +do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 +entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7feb5d857559 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 +89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d +01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007feb5d7f1208 EFLAGS: 00000246 ORIG_RAX: 000000000000002d +RAX: ffffffffffffffda RBX: 00007feb5d8e1318 RCX: 00007feb5d857559 +RDX: 000000800000000e RSI: 0000000000000000 RDI: 0000000000000003 +RBP: 00007feb5d8e1310 R08: 0000000000000000 R09: ffffffff81000000 +R10: 0000000000000100 R11: 0000000000000246 R12: 00007feb5d8e131c +R13: 00007feb5d8ae074 R14: 000000800000000e R15: 00000000fffffdef + +and provided a nice reproducer. + +The root cause is the current bad handling of racing disconnect. +After the blamed commit below, sk_wait_data() can return (with +error) with the underlying socket disconnected and a zero rcv_mss. + +Catch the error and return without performing any additional +operations on the current socket. + +Reported-by: Eric Dumazet +Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting") +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/8c82ecf71662ecbc47bf390f9905de70884c9f2d.1731060874.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/protocol.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index ec87b36f0d451..f3e54c836ba56 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2205,7 +2205,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + cmsg_flags = MPTCP_CMSG_INQ; + + while (copied < len) { +- int bytes_read; ++ int err, bytes_read; + + bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); + if (unlikely(bytes_read < 0)) { +@@ -2267,9 +2267,16 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + } + + pr_debug("block timeout %ld\n", timeo); +- sk_wait_data(sk, &timeo, NULL); ++ mptcp_rcv_space_adjust(msk, copied); ++ err = sk_wait_data(sk, &timeo, NULL); ++ if (err < 0) { ++ err = copied ? : err; ++ goto out_err; ++ } + } + ++ mptcp_rcv_space_adjust(msk, copied); ++ + out_err: + if (cmsg_flags && copied >= 0) { + if (cmsg_flags & MPTCP_CMSG_TS) +@@ -2285,8 +2292,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", + msk, skb_queue_empty_lockless(&sk->sk_receive_queue), + skb_queue_empty(&msk->receive_queue), copied); +- if (!(flags & MSG_PEEK)) +- mptcp_rcv_space_adjust(msk, copied); + + release_sock(sk); + return copied; +-- +2.43.0 + diff --git a/queue-6.11/net-fix-data-races-around-sk-sk_forward_alloc.patch b/queue-6.11/net-fix-data-races-around-sk-sk_forward_alloc.patch new file mode 100644 index 00000000000..cbd9b2e9070 --- /dev/null +++ b/queue-6.11/net-fix-data-races-around-sk-sk_forward_alloc.patch @@ -0,0 +1,147 @@ +From 170d0354dc8915ee9466b347d4afee9f5e741524 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 10:34:05 +0800 +Subject: net: fix data-races around sk->sk_forward_alloc + +From: Wang Liang + +[ Upstream commit 073d89808c065ac4c672c0a613a71b27a80691cb ] + +Syzkaller reported this warning: + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 16 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x1c5/0x1e0 + Modules linked in: + CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.12.0-rc5 #26 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 + RIP: 0010:inet_sock_destruct+0x1c5/0x1e0 + Code: 24 12 4c 89 e2 5b 48 c7 c7 98 ec bb 82 41 5c e9 d1 18 17 ff 4c 89 e6 5b 48 c7 c7 d0 ec bb 82 41 5c e9 bf 18 17 ff 0f 0b eb 83 <0f> 0b eb 97 0f 0b eb 87 0f 0b e9 68 ff ff ff 66 66 2e 0f 1f 84 00 + RSP: 0018:ffffc9000008bd90 EFLAGS: 00010206 + RAX: 0000000000000300 RBX: ffff88810b172a90 RCX: 0000000000000007 + RDX: 0000000000000002 RSI: 0000000000000300 RDI: ffff88810b172a00 + RBP: ffff88810b172a00 R08: ffff888104273c00 R09: 0000000000100007 + R10: 0000000000020000 R11: 0000000000000006 R12: ffff88810b172a00 + R13: 0000000000000004 R14: 0000000000000000 R15: ffff888237c31f78 + FS: 0000000000000000(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007ffc63fecac8 CR3: 000000000342e000 CR4: 00000000000006f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + ? __warn+0x88/0x130 + ? inet_sock_destruct+0x1c5/0x1e0 + ? report_bug+0x18e/0x1a0 + ? handle_bug+0x53/0x90 + ? exc_invalid_op+0x18/0x70 + ? asm_exc_invalid_op+0x1a/0x20 + ? inet_sock_destruct+0x1c5/0x1e0 + __sk_destruct+0x2a/0x200 + rcu_do_batch+0x1aa/0x530 + ? rcu_do_batch+0x13b/0x530 + rcu_core+0x159/0x2f0 + handle_softirqs+0xd3/0x2b0 + ? __pfx_smpboot_thread_fn+0x10/0x10 + run_ksoftirqd+0x25/0x30 + smpboot_thread_fn+0xdd/0x1d0 + kthread+0xd3/0x100 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x34/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + ---[ end trace 0000000000000000 ]--- + +Its possible that two threads call tcp_v6_do_rcv()/sk_forward_alloc_add() +concurrently when sk->sk_state == TCP_LISTEN with sk->sk_lock unlocked, +which triggers a data-race around sk->sk_forward_alloc: +tcp_v6_rcv + tcp_v6_do_rcv + skb_clone_and_charge_r + sk_rmem_schedule + __sk_mem_schedule + sk_forward_alloc_add() + skb_set_owner_r + sk_mem_charge + sk_forward_alloc_add() + __kfree_skb + skb_release_all + skb_release_head_state + sock_rfree + sk_mem_uncharge + sk_forward_alloc_add() + sk_mem_reclaim + // set local var reclaimable + __sk_mem_reclaim + sk_forward_alloc_add() + +In this syzkaller testcase, two threads call +tcp_v6_do_rcv() with skb->truesize=768, the sk_forward_alloc changes like +this: + (cpu 1) | (cpu 2) | sk_forward_alloc + ... | ... | 0 + __sk_mem_schedule() | | +4096 = 4096 + | __sk_mem_schedule() | +4096 = 8192 + sk_mem_charge() | | -768 = 7424 + | sk_mem_charge() | -768 = 6656 + ... | ... | + sk_mem_uncharge() | | +768 = 7424 + reclaimable=7424 | | + | sk_mem_uncharge() | +768 = 8192 + | reclaimable=8192 | + __sk_mem_reclaim() | | -4096 = 4096 + | __sk_mem_reclaim() | -8192 = -4096 != 0 + +The skb_clone_and_charge_r() should not be called in tcp_v6_do_rcv() when +sk->sk_state is TCP_LISTEN, it happens later in tcp_v6_syn_recv_sock(). +Fix the same issue in dccp_v6_do_rcv(). + +Suggested-by: Eric Dumazet +Reviewed-by: Eric Dumazet +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Signed-off-by: Wang Liang +Link: https://patch.msgid.link/20241107023405.889239-1-wangliang74@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dccp/ipv6.c | 2 +- + net/ipv6/tcp_ipv6.c | 4 +--- + 2 files changed, 2 insertions(+), 4 deletions(-) + +diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c +index da5dba120bc9a..d6649246188d7 100644 +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -618,7 +618,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + by tcp. Feel free to propose better solution. + --ANK (980728) + */ +- if (np->rxopt.all) ++ if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) + opt_skb = skb_clone_and_charge_r(skb, sk); + + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 200fea92f12fc..84cd46311da09 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1617,7 +1617,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + by tcp. Feel free to propose better solution. + --ANK (980728) + */ +- if (np->rxopt.all) ++ if (np->rxopt.all && sk->sk_state != TCP_LISTEN) + opt_skb = skb_clone_and_charge_r(skb, sk); + + if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ +@@ -1655,8 +1655,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + if (reason) + goto reset; + } +- if (opt_skb) +- __kfree_skb(opt_skb); + return 0; + } + } else +-- +2.43.0 + diff --git a/queue-6.11/net-make-copy_safe_from_sockptr-match-documentation.patch b/queue-6.11/net-make-copy_safe_from_sockptr-match-documentation.patch new file mode 100644 index 00000000000..3c9cfbb53df --- /dev/null +++ b/queue-6.11/net-make-copy_safe_from_sockptr-match-documentation.patch @@ -0,0 +1,47 @@ +From fcc5b1c3d1590c3f379e48b6b06e9db588ee1c8d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Nov 2024 00:17:34 +0100 +Subject: net: Make copy_safe_from_sockptr() match documentation + +From: Michal Luczaj + +[ Upstream commit eb94b7bb10109a14a5431a67e5d8e31cfa06b395 ] + +copy_safe_from_sockptr() + return copy_from_sockptr() + return copy_from_sockptr_offset() + return copy_from_user() + +copy_from_user() does not return an error on fault. Instead, it returns a +number of bytes that were not copied. Have it handled. + +Patch has a side effect: it un-breaks garbage input handling of +nfc_llcp_setsockopt() and mISDN's data_sock_setsockopt(). + +Fixes: 6309863b31dd ("net: add copy_safe_from_sockptr() helper") +Signed-off-by: Michal Luczaj +Link: https://patch.msgid.link/20241111-sockptr-copy-ret-fix-v1-1-a520083a93fb@rbox.co +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/linux/sockptr.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h +index fc5a206c40435..195debe2b1dbc 100644 +--- a/include/linux/sockptr.h ++++ b/include/linux/sockptr.h +@@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize, + { + if (optlen < ksize) + return -EINVAL; +- return copy_from_sockptr(dst, optval, ksize); ++ if (copy_from_sockptr(dst, optval, ksize)) ++ return -EFAULT; ++ return 0; + } + + static inline int copy_struct_from_sockptr(void *dst, size_t ksize, +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5-fix-msix-vectors-to-respect-platform-limit.patch b/queue-6.11/net-mlx5-fix-msix-vectors-to-respect-platform-limit.patch new file mode 100644 index 00000000000..c8b68d2a27c --- /dev/null +++ b/queue-6.11/net-mlx5-fix-msix-vectors-to-respect-platform-limit.patch @@ -0,0 +1,147 @@ +From d50adf97fddf6df5e7d6d12e2bbbe7302da463f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:22 +0200 +Subject: net/mlx5: Fix msix vectors to respect platform limit + +From: Parav Pandit + +[ Upstream commit d0989c9d2b3a89ae5e4ad45fe6d7bbe449fc49fe ] + +The number of PCI vectors allocated by the platform (which may be fewer +than requested) is currently not honored when creating the SF pool; +only the PCI MSI-X capability is considered. + +As a result, when a platform allocates fewer vectors +(in non-dynamic mode) than requested, the PF and SF pools end up +with an invalid vector range. + +This causes incorrect SF vector accounting, which leads to the +following call trace when an invalid IRQ vector is allocated. + +This issue is resolved by ensuring that the platform's vector +limit is respected for both the SF and PF pools. + +Workqueue: mlx5_vhca_event0 mlx5_sf_dev_add_active_work [mlx5_core] +RIP: 0010:pci_irq_vector+0x23/0x80 +RSP: 0018:ffffabd5cebd7248 EFLAGS: 00010246 +RAX: ffff980880e7f308 RBX: ffff9808932fb880 RCX: 0000000000000001 +RDX: 00000000000001ff RSI: 0000000000000200 RDI: ffff980880e7f308 +RBP: 0000000000000200 R08: 0000000000000010 R09: ffff97a9116f0860 +R10: 0000000000000002 R11: 0000000000000228 R12: ffff980897cd0160 +R13: 0000000000000000 R14: ffff97a920fec0c0 R15: ffffabd5cebd72d0 +FS: 0000000000000000(0000) GS:ffff97c7ff9c0000(0000) knlGS:0000000000000000 + ? rescuer_thread+0x350/0x350 + kthread+0x11b/0x140 + ? __kthread_bind_mask+0x60/0x60 + ret_from_fork+0x22/0x30 +mlx5_core 0000:a1:00.0: mlx5_irq_alloc:321:(pid 6781): Failed to request irq. err = -22 +mlx5_core 0000:a1:00.0: mlx5_irq_alloc:321:(pid 6781): Failed to request irq. err = -22 +mlx5_core.sf mlx5_core.sf.6: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0 enhanced) +mlx5_core.sf mlx5_core.sf.7: firmware version: 32.43.356 +mlx5_core.sf mlx5_core.sf.6 enpa1s0f0s4: renamed from eth0 +mlx5_core.sf mlx5_core.sf.7: Rate limit: 127 rates are supported, range: 0Mbps to 195312Mbps +mlx5_core 0000:a1:00.0: mlx5_irq_alloc:321:(pid 6781): Failed to request irq. err = -22 +mlx5_core 0000:a1:00.0: mlx5_irq_alloc:321:(pid 6781): Failed to request irq. err = -22 +mlx5_core 0000:a1:00.0: mlx5_irq_alloc:321:(pid 6781): Failed to request irq. err = -22 + +Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") +Signed-off-by: Parav Pandit +Signed-off-by: Amir Tzin +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-3-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 32 ++++++++++++++++--- + 1 file changed, 27 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +index 81a9232a03e1b..7db9cab9bedf6 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +@@ -593,9 +593,11 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) + kvfree(pool); + } + +-static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) ++static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, ++ bool dynamic_vec) + { + struct mlx5_irq_table *table = dev->priv.irq_table; ++ int sf_vec_available = sf_vec; + int num_sf_ctrl; + int err; + +@@ -616,6 +618,13 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) + num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev), + MLX5_SFS_PER_CTRL_IRQ); + num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); ++ if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) { ++ mlx5_core_dbg(dev, ++ "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n", ++ num_sf_ctrl + 1, sf_vec_available); ++ return 0; ++ } ++ + table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, + "mlx5_sf_ctrl", + MLX5_EQ_SHARE_IRQ_MIN_CTRL, +@@ -624,9 +633,11 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) + err = PTR_ERR(table->sf_ctrl_pool); + goto err_pf; + } +- /* init sf_comp_pool */ ++ sf_vec_available -= num_sf_ctrl; ++ ++ /* init sf_comp_pool, remaining vectors are for the SF completions */ + table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, +- sf_vec - num_sf_ctrl, "mlx5_sf_comp", ++ sf_vec_available, "mlx5_sf_comp", + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->sf_comp_pool)) { +@@ -715,6 +726,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) + int mlx5_irq_table_create(struct mlx5_core_dev *dev) + { + int num_eqs = mlx5_max_eq_cap_get(dev); ++ bool dynamic_vec; + int total_vec; + int pcif_vec; + int req_vec; +@@ -724,21 +736,31 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) + if (mlx5_core_is_sf(dev)) + return 0; + ++ /* PCI PF vectors usage is limited by online cpus, device EQs and ++ * PCI MSI-X capability. ++ */ + pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; + pcif_vec = min_t(int, pcif_vec, num_eqs); ++ pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); + + total_vec = pcif_vec; + if (mlx5_sf_max_functions(dev)) + total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev); + total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); +- pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); + + req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; + n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); + if (n < 0) + return n; + +- err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); ++ /* Further limit vectors of the pools based on platform for non dynamic case */ ++ dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev); ++ if (!dynamic_vec) { ++ pcif_vec = min_t(int, n, pcif_vec); ++ total_vec = min_t(int, n, total_vec); ++ } ++ ++ err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec); + if (err) + pci_free_irq_vectors(dev->pdev); + +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5-fs-lock-fte-when-checking-if-active.patch b/queue-6.11/net-mlx5-fs-lock-fte-when-checking-if-active.patch new file mode 100644 index 00000000000..5a931b99043 --- /dev/null +++ b/queue-6.11/net-mlx5-fs-lock-fte-when-checking-if-active.patch @@ -0,0 +1,130 @@ +From 27d420d756bfdad44343360275a050e9a10d1d08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:23 +0200 +Subject: net/mlx5: fs, lock FTE when checking if active + +From: Mark Bloch + +[ Upstream commit 9ca314419930f9135727e39d77e66262d5f7bef6 ] + +The referenced commits introduced a two-step process for deleting FTEs: + +- Lock the FTE, delete it from hardware, set the hardware deletion function + to NULL and unlock the FTE. +- Lock the parent flow group, delete the software copy of the FTE, and + remove it from the xarray. + +However, this approach encounters a race condition if a rule with the same +match value is added simultaneously. In this scenario, fs_core may set the +hardware deletion function to NULL prematurely, causing a panic during +subsequent rule deletions. + +To prevent this, ensure the active flag of the FTE is checked under a lock, +which will prevent the fs_core layer from attaching a new steering rule to +an FTE that is in the process of deletion. + +[ 438.967589] MOSHE: 2496 mlx5_del_flow_rules del_hw_func +[ 438.968205] ------------[ cut here ]------------ +[ 438.968654] refcount_t: decrement hit 0; leaking memory. +[ 438.969249] WARNING: CPU: 0 PID: 8957 at lib/refcount.c:31 refcount_warn_saturate+0xfb/0x110 +[ 438.970054] Modules linked in: act_mirred cls_flower act_gact sch_ingress openvswitch nsh mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core zram zsmalloc fuse [last unloaded: cls_flower] +[ 438.973288] CPU: 0 UID: 0 PID: 8957 Comm: tc Not tainted 6.12.0-rc1+ #8 +[ 438.973888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 438.974874] RIP: 0010:refcount_warn_saturate+0xfb/0x110 +[ 438.975363] Code: 40 66 3b 82 c6 05 16 e9 4d 01 01 e8 1f 7c a0 ff 0f 0b c3 cc cc cc cc 48 c7 c7 10 66 3b 82 c6 05 fd e8 4d 01 01 e8 05 7c a0 ff <0f> 0b c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 90 +[ 438.976947] RSP: 0018:ffff888124a53610 EFLAGS: 00010286 +[ 438.977446] RAX: 0000000000000000 RBX: ffff888119d56de0 RCX: 0000000000000000 +[ 438.978090] RDX: ffff88852c828700 RSI: ffff88852c81b3c0 RDI: ffff88852c81b3c0 +[ 438.978721] RBP: ffff888120fa0e88 R08: 0000000000000000 R09: ffff888124a534b0 +[ 438.979353] R10: 0000000000000001 R11: 0000000000000001 R12: ffff888119d56de0 +[ 438.979979] R13: ffff888120fa0ec0 R14: ffff888120fa0ee8 R15: ffff888119d56de0 +[ 438.980607] FS: 00007fe6dcc0f800(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000 +[ 438.983984] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 438.984544] CR2: 00000000004275e0 CR3: 0000000186982001 CR4: 0000000000372eb0 +[ 438.985205] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 438.985842] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 438.986507] Call Trace: +[ 438.986799] +[ 438.987070] ? __warn+0x7d/0x110 +[ 438.987426] ? refcount_warn_saturate+0xfb/0x110 +[ 438.987877] ? report_bug+0x17d/0x190 +[ 438.988261] ? prb_read_valid+0x17/0x20 +[ 438.988659] ? handle_bug+0x53/0x90 +[ 438.989054] ? exc_invalid_op+0x14/0x70 +[ 438.989458] ? asm_exc_invalid_op+0x16/0x20 +[ 438.989883] ? refcount_warn_saturate+0xfb/0x110 +[ 438.990348] mlx5_del_flow_rules+0x2f7/0x340 [mlx5_core] +[ 438.990932] __mlx5_eswitch_del_rule+0x49/0x170 [mlx5_core] +[ 438.991519] ? mlx5_lag_is_sriov+0x3c/0x50 [mlx5_core] +[ 438.992054] ? xas_load+0x9/0xb0 +[ 438.992407] mlx5e_tc_rule_unoffload+0x45/0xe0 [mlx5_core] +[ 438.993037] mlx5e_tc_del_fdb_flow+0x2a6/0x2e0 [mlx5_core] +[ 438.993623] mlx5e_flow_put+0x29/0x60 [mlx5_core] +[ 438.994161] mlx5e_delete_flower+0x261/0x390 [mlx5_core] +[ 438.994728] tc_setup_cb_destroy+0xb9/0x190 +[ 438.995150] fl_hw_destroy_filter+0x94/0xc0 [cls_flower] +[ 438.995650] fl_change+0x11a4/0x13c0 [cls_flower] +[ 438.996105] tc_new_tfilter+0x347/0xbc0 +[ 438.996503] ? ___slab_alloc+0x70/0x8c0 +[ 438.996929] rtnetlink_rcv_msg+0xf9/0x3e0 +[ 438.997339] ? __netlink_sendskb+0x4c/0x70 +[ 438.997751] ? netlink_unicast+0x286/0x2d0 +[ 438.998171] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 +[ 438.998625] netlink_rcv_skb+0x54/0x100 +[ 438.999020] netlink_unicast+0x203/0x2d0 +[ 438.999421] netlink_sendmsg+0x1e4/0x420 +[ 438.999820] __sock_sendmsg+0xa1/0xb0 +[ 439.000203] ____sys_sendmsg+0x207/0x2a0 +[ 439.000600] ? copy_msghdr_from_user+0x6d/0xa0 +[ 439.001072] ___sys_sendmsg+0x80/0xc0 +[ 439.001459] ? ___sys_recvmsg+0x8b/0xc0 +[ 439.001848] ? generic_update_time+0x4d/0x60 +[ 439.002282] __sys_sendmsg+0x51/0x90 +[ 439.002658] do_syscall_64+0x50/0x110 +[ 439.003040] entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") +Fixes: cefc23554fc2 ("net/mlx5: Fix FTE cleanup") +Signed-off-by: Mark Bloch +Reviewed-by: Maor Gottlieb +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-4-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +index a47d6419160d7..fb01acbadf732 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -1946,13 +1946,22 @@ lookup_fte_locked(struct mlx5_flow_group *g, + fte_tmp = NULL; + goto out; + } ++ ++ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); ++ + if (!fte_tmp->node.active) { ++ up_write_ref_node(&fte_tmp->node, false); ++ ++ if (take_write) ++ up_write_ref_node(&g->node, false); ++ else ++ up_read_ref_node(&g->node); ++ + tree_put_node(&fte_tmp->node, false); +- fte_tmp = NULL; +- goto out; ++ ++ return NULL; + } + +- nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + out: + if (take_write) + up_write_ref_node(&g->node, false); +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5e-clear-xdp-features-on-non-uplink-represent.patch b/queue-6.11/net-mlx5e-clear-xdp-features-on-non-uplink-represent.patch new file mode 100644 index 00000000000..238d46767c9 --- /dev/null +++ b/queue-6.11/net-mlx5e-clear-xdp-features-on-non-uplink-represent.patch @@ -0,0 +1,59 @@ +From 7947b20fb5a0fad2b1a0404ec47fbea34f53e389 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:25 +0200 +Subject: net/mlx5e: clear xdp features on non-uplink representors + +From: William Tu + +[ Upstream commit c079389878debf767dc4e52fe877b9117258dfe2 ] + +Non-uplink representor port does not support XDP. The patch clears +the xdp feature by checking the net_device_ops.ndo_bpf is set or not. + +Verify using the netlink tool: +$ tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump dev-get + +Representor netdev before the patch: +{'ifindex': 8, + 'xdp-features': {'basic', + 'ndo-xmit', + 'ndo-xmit-sg', + 'redirect', + 'rx-sg', + 'xsk-zerocopy'}, + 'xdp-rx-metadata-features': set(), + 'xdp-zc-max-segs': 1, + 'xsk-features': set()}, +With the patch: + {'ifindex': 8, + 'xdp-features': set(), + 'xdp-rx-metadata-features': set(), + 'xsk-features': set()}, + +Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") +Signed-off-by: William Tu +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-6-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 3e11c1c6d4f69..99d0b977ed3d2 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -4266,7 +4266,8 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) + struct mlx5e_params *params = &priv->channels.params; + xdp_features_t val; + +- if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { ++ if (!netdev->netdev_ops->ndo_bpf || ++ params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + xdp_clear_features_flag(netdev); + return; + } +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch b/queue-6.11/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch new file mode 100644 index 00000000000..22ea9c4043b --- /dev/null +++ b/queue-6.11/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch @@ -0,0 +1,71 @@ +From f8d3a5df283312a456d5a3998523026991257cff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:26 +0200 +Subject: net/mlx5e: CT: Fix null-ptr-deref in add rule err flow +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Moshe Shemesh + +[ Upstream commit e99c6873229fe0482e7ceb7d5600e32d623ed9d9 ] + +In error flow of mlx5_tc_ct_entry_add_rule(), in case ct_rule_add() +callback returns error, zone_rule->attr is used uninitiated. Fix it to +use attr which has the needed pointer value. + +Kernel log: + BUG: kernel NULL pointer dereference, address: 0000000000000110 + RIP: 0010:mlx5_tc_ct_entry_add_rule+0x2b1/0x2f0 [mlx5_core] +… + Call Trace: + + ? __die+0x20/0x70 + ? page_fault_oops+0x150/0x3e0 + ? exc_page_fault+0x74/0x140 + ? asm_exc_page_fault+0x22/0x30 + ? mlx5_tc_ct_entry_add_rule+0x2b1/0x2f0 [mlx5_core] + ? mlx5_tc_ct_entry_add_rule+0x1d5/0x2f0 [mlx5_core] + mlx5_tc_ct_block_flow_offload+0xc6a/0xf90 [mlx5_core] + ? nf_flow_offload_tuple+0xd8/0x190 [nf_flow_table] + nf_flow_offload_tuple+0xd8/0x190 [nf_flow_table] + flow_offload_work_handler+0x142/0x320 [nf_flow_table] + ? finish_task_switch.isra.0+0x15b/0x2b0 + process_one_work+0x16c/0x320 + worker_thread+0x28c/0x3a0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0xb8/0xf0 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x2d/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + +Fixes: 7fac5c2eced3 ("net/mlx5: CT: Avoid reusing modify header context for natted entries") +Signed-off-by: Moshe Shemesh +Reviewed-by: Cosmin Ratiu +Reviewed-by: Yevgeny Kliteynik +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-7-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +index 71a168746ebe2..deabed1d46a14 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +@@ -866,7 +866,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + return 0; + + err_rule: +- mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); ++ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr, zone_rule->mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); + err_mod_hdr: + kfree(attr); +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5e-disable-loopback-self-test-on-multi-pf-net.patch b/queue-6.11/net-mlx5e-disable-loopback-self-test-on-multi-pf-net.patch new file mode 100644 index 00000000000..de4572acb50 --- /dev/null +++ b/queue-6.11/net-mlx5e-disable-loopback-self-test-on-multi-pf-net.patch @@ -0,0 +1,52 @@ +From 124039ceeb29c8b1afb7c999196096cf8024abeb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:27 +0200 +Subject: net/mlx5e: Disable loopback self-test on multi-PF netdev + +From: Carolina Jubran + +[ Upstream commit d1ac33934a66e8d58a52668999bf9e8f59e56c81 ] + +In Multi-PF (Socket Direct) configurations, when a loopback packet is +sent through one of the secondary devices, it will always be received +on the primary device. This causes the loopback layer to fail in +identifying the loopback packet as the devices are different. + +To avoid false test failures, disable the loopback self-test in +Multi-PF configurations. + +Fixes: ed29705e4ed1 ("net/mlx5: Enable SD feature") +Signed-off-by: Carolina Jubran +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-8-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +index 5bf8318cc48b8..1d60465cc2ca4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +@@ -36,6 +36,7 @@ + #include "en.h" + #include "en/port.h" + #include "eswitch.h" ++#include "lib/mlx5.h" + + static int mlx5e_test_health_info(struct mlx5e_priv *priv) + { +@@ -247,6 +248,9 @@ static int mlx5e_cond_loopback(struct mlx5e_priv *priv) + if (is_mdev_switchdev_mode(priv->mdev)) + return -EOPNOTSUPP; + ++ if (mlx5_get_sd(priv->mdev)) ++ return -EOPNOTSUPP; ++ + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.11/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch b/queue-6.11/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch new file mode 100644 index 00000000000..045c80e647a --- /dev/null +++ b/queue-6.11/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch @@ -0,0 +1,72 @@ +From 3b96f523f0f9c8865006466bc256e82311bd0f63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 20:35:24 +0200 +Subject: net/mlx5e: kTLS, Fix incorrect page refcounting + +From: Dragos Tatulea + +[ Upstream commit dd6e972cc5890d91d6749bb48e3912721c4e4b25 ] + +The kTLS tx handling code is using a mix of get_page() and +page_ref_inc() APIs to increment the page reference. But on the release +path (mlx5e_ktls_tx_handle_resync_dump_comp()), only put_page() is used. + +This is an issue when using pages from large folios: the get_page() +references are stored on the folio page while the page_ref_inc() +references are stored directly in the given page. On release the folio +page will be dereferenced too many times. + +This was found while doing kTLS testing with sendfile() + ZC when the +served file was read from NFS on a kernel with NFS large folios support +(commit 49b29a573da8 ("nfs: add support for large folios")). + +Fixes: 84d1bb2b139e ("net/mlx5e: kTLS, Limit DUMP wqe size") +Signed-off-by: Dragos Tatulea +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20241107183527.676877-5-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +index d61be26a4df1a..3db31cc107192 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +@@ -660,7 +660,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + +- get_page(skb_frag_page(frag)); ++ page_ref_inc(skb_frag_page(frag)); + remaining -= skb_frag_size(frag); + info->frags[i++] = *frag; + } +@@ -763,7 +763,7 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); +- put_page(wi->resync_dump_frag_page); ++ page_ref_dec(wi->resync_dump_frag_page); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; + } +@@ -816,12 +816,12 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + + err_out: + for (; i < info.nr_frags; i++) +- /* The put_page() here undoes the page ref obtained in tx_sync_info_get(). ++ /* The page_ref_dec() here undoes the page ref obtained in tx_sync_info_get(). + * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be + * released only upon their completions (or in mlx5e_free_txqsq_descs, + * if channel closes). + */ +- put_page(skb_frag_page(&info.frags[i])); ++ page_ref_dec(skb_frag_page(&info.frags[i])); + + return MLX5E_KTLS_SYNC_FAIL; + } +-- +2.43.0 + diff --git a/queue-6.11/net-phylink-ensure-phy-momentary-link-fails-are-hand.patch b/queue-6.11/net-phylink-ensure-phy-momentary-link-fails-are-hand.patch new file mode 100644 index 00000000000..1f9b27accfe --- /dev/null +++ b/queue-6.11/net-phylink-ensure-phy-momentary-link-fails-are-hand.patch @@ -0,0 +1,97 @@ +From 10df8dc7814e42be2d6f27c98f15d9c50c080ffd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 16:20:00 +0000 +Subject: net: phylink: ensure PHY momentary link-fails are handled + +From: Russell King (Oracle) + +[ Upstream commit 671154f174e0e7f242507cd074497661deb41bfd ] + +Normally, phylib won't notify changes in quick succession. However, as +a result of commit 3e43b903da04 ("net: phy: Immediately call +adjust_link if only tx_lpi_enabled changes") this is no longer true - +it is now possible that phy_link_down() and phy_link_up() will both +complete before phylink's resolver has run, which means it'll miss that +pl->phy_state.link momentarily became false. + +Rename "mac_link_dropped" to be more generic "link_failed" since it will +cover more than the MAC/PCS end of the link failing, and arrange to set +this in phylink_phy_change() if we notice that the PHY reports that the +link is down. + +This will ensure that we capture an EEE reconfiguration event. + +Fixes: 3e43b903da04 ("net: phy: Immediately call adjust_link if only tx_lpi_enabled changes") +Signed-off-by: Russell King (Oracle) +Reviewed-by: Oleksij Rempel +Link: https://patch.msgid.link/E1tAtcW-002RBS-LB@rmk-PC.armlinux.org.uk +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phylink.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c +index 51c526d227fab..ed60836180b78 100644 +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -78,7 +78,7 @@ struct phylink { + unsigned int pcs_neg_mode; + unsigned int pcs_state; + +- bool mac_link_dropped; ++ bool link_failed; + bool using_mac_select_pcs; + + struct sfp_bus *sfp_bus; +@@ -1475,9 +1475,9 @@ static void phylink_resolve(struct work_struct *w) + cur_link_state = pl->old_link_state; + + if (pl->phylink_disable_state) { +- pl->mac_link_dropped = false; ++ pl->link_failed = false; + link_state.link = false; +- } else if (pl->mac_link_dropped) { ++ } else if (pl->link_failed) { + link_state.link = false; + retrigger = true; + } else { +@@ -1572,7 +1572,7 @@ static void phylink_resolve(struct work_struct *w) + phylink_link_up(pl, link_state); + } + if (!link_state.link && retrigger) { +- pl->mac_link_dropped = false; ++ pl->link_failed = false; + queue_work(system_power_efficient_wq, &pl->resolve); + } + mutex_unlock(&pl->state_mutex); +@@ -1793,6 +1793,8 @@ static void phylink_phy_change(struct phy_device *phydev, bool up) + pl->phy_state.pause |= MLO_PAUSE_RX; + pl->phy_state.interface = phydev->interface; + pl->phy_state.link = up; ++ if (!up) ++ pl->link_failed = true; + mutex_unlock(&pl->state_mutex); + + phylink_run_resolve(pl); +@@ -2116,7 +2118,7 @@ EXPORT_SYMBOL_GPL(phylink_disconnect_phy); + static void phylink_link_changed(struct phylink *pl, bool up, const char *what) + { + if (!up) +- pl->mac_link_dropped = true; ++ pl->link_failed = true; + phylink_run_resolve(pl); + phylink_dbg(pl, "%s link %s\n", what, up ? "up" : "down"); + } +@@ -2750,7 +2752,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, + * link will cycle. + */ + if (manual_changed) { +- pl->mac_link_dropped = true; ++ pl->link_failed = true; + phylink_run_resolve(pl); + } + +-- +2.43.0 + diff --git a/queue-6.11/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch b/queue-6.11/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch new file mode 100644 index 00000000000..6dda3125af0 --- /dev/null +++ b/queue-6.11/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch @@ -0,0 +1,101 @@ +From 8c8336a941cf43856f6bc3035225691eb753b054 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Nov 2024 18:28:36 +0100 +Subject: net: sched: cls_u32: Fix u32's systematic failure to free IDR entries + for hnodes. + +From: Alexandre Ferrieux + +[ Upstream commit 73af53d82076bbe184d9ece9e14b0dc8599e6055 ] + +To generate hnode handles (in gen_new_htid()), u32 uses IDR and +encodes the returned small integer into a structured 32-bit +word. Unfortunately, at disposal time, the needed decoding +is not done. As a result, idr_remove() fails, and the IDR +fills up. Since its size is 2048, the following script ends up +with "Filter already exists": + + tc filter add dev myve $FILTER1 + tc filter add dev myve $FILTER2 + for i in {1..2048} + do + echo $i + tc filter del dev myve $FILTER2 + tc filter add dev myve $FILTER2 + done + +This patch adds the missing decoding logic for handles that +deserve it. + +Fixes: e7614370d6f0 ("net_sched: use idr to allocate u32 filter handles") +Reviewed-by: Eric Dumazet +Acked-by: Jamal Hadi Salim +Signed-off-by: Alexandre Ferrieux +Tested-by: Victor Nogueira +Link: https://patch.msgid.link/20241110172836.331319-1-alexandre.ferrieux@orange.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index 9412d88a99bc1..d3a03c57545bc 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -92,6 +92,16 @@ struct tc_u_common { + long knodes; + }; + ++static u32 handle2id(u32 h) ++{ ++ return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h); ++} ++ ++static u32 id2handle(u32 id) ++{ ++ return (id | 0x800U) << 20; ++} ++ + static inline unsigned int u32_hash_fold(__be32 key, + const struct tc_u32_sel *sel, + u8 fshift) +@@ -310,7 +320,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) + int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL); + if (id < 0) + return 0; +- return (id | 0x800U) << 20; ++ return id2handle(id); + } + + static struct hlist_head *tc_u_common_hash; +@@ -360,7 +370,7 @@ static int u32_init(struct tcf_proto *tp) + return -ENOBUFS; + + refcount_set(&root_ht->refcnt, 1); +- root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; ++ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0); + root_ht->prio = tp->prio; + root_ht->is_root = true; + idr_init(&root_ht->handle_idr); +@@ -612,7 +622,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + if (phn == ht) { + u32_clear_hw_hnode(tp, ht, extack); + idr_destroy(&ht->handle_idr); +- idr_remove(&tp_c->handle_idr, ht->handle); ++ idr_remove(&tp_c->handle_idr, handle2id(ht->handle)); + RCU_INIT_POINTER(*hn, ht->next); + kfree_rcu(ht, rcu); + return 0; +@@ -989,7 +999,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + + err = u32_replace_hw_hnode(tp, ht, userflags, extack); + if (err) { +- idr_remove(&tp_c->handle_idr, handle); ++ idr_remove(&tp_c->handle_idr, handle2id(handle)); + kfree(ht); + return err; + } +-- +2.43.0 + diff --git a/queue-6.11/net-stmmac-dwmac-mediatek-fix-inverted-handling-of-m.patch b/queue-6.11/net-stmmac-dwmac-mediatek-fix-inverted-handling-of-m.patch new file mode 100644 index 00000000000..8d2209ee227 --- /dev/null +++ b/queue-6.11/net-stmmac-dwmac-mediatek-fix-inverted-handling-of-m.patch @@ -0,0 +1,46 @@ +From c56ec727b9eee6a62ba405b177e45baed92f6d4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Nov 2024 10:16:32 -0500 +Subject: net: stmmac: dwmac-mediatek: Fix inverted handling of + mediatek,mac-wol +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Nícolas F. R. A. Prado + +[ Upstream commit a03b18a71c128846360cc81ac6fdb0e7d41597b4 ] + +The mediatek,mac-wol property is being handled backwards to what is +described in the binding: it currently enables PHY WOL when the property +is present and vice versa. Invert the driver logic so it matches the +binding description. + +Fixes: fd1d62d80ebc ("net: stmmac: replace the use_phy_wol field with a flag") +Signed-off-by: Nícolas F. R. A. Prado +Link: https://patch.msgid.link/20241109-mediatek-mac-wol-noninverted-v2-1-0e264e213878@collabora.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +index 2a9132d6d743c..001857c294fba 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +@@ -589,9 +589,9 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, + + plat->mac_interface = priv_plat->phy_mode; + if (priv_plat->mac_wol) +- plat->flags |= STMMAC_FLAG_USE_PHY_WOL; +- else + plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL; ++ else ++ plat->flags |= STMMAC_FLAG_USE_PHY_WOL; + plat->riwt_off = 1; + plat->maxmtu = ETH_DATA_LEN; + plat->host_dma_width = priv_plat->variant->dma_bit_mask; +-- +2.43.0 + diff --git a/queue-6.11/net-ti-icssg-prueth-fix-1-pps-sync.patch b/queue-6.11/net-ti-icssg-prueth-fix-1-pps-sync.patch new file mode 100644 index 00000000000..fea0ab35390 --- /dev/null +++ b/queue-6.11/net-ti-icssg-prueth-fix-1-pps-sync.patch @@ -0,0 +1,93 @@ +From e1ce5fb584c1f50fcb2f2d9e3d94aaa41d3eec24 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Nov 2024 15:28:42 +0530 +Subject: net: ti: icssg-prueth: Fix 1 PPS sync + +From: Meghana Malladi + +[ Upstream commit dc065076ee7768377d7c16af7d1b0767782d8c98 ] + +The first PPS latch time needs to be calculated by the driver +(in rounded off seconds) and configured as the start time +offset for the cycle. After synchronizing two PTP clocks +running as master/slave, missing this would cause master +and slave to start immediately with some milliseconds +drift which causes the PPS signal to never synchronize with +the PTP master. + +Fixes: 186734c15886 ("net: ti: icssg-prueth: add packet timestamping and ptp support") +Signed-off-by: Meghana Malladi +Reviewed-by: Vadim Fedorenko +Reviewed-by: MD Danish Anwar +Link: https://patch.msgid.link/20241111095842.478833-1-m-malladi@ti.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ti/icssg/icssg_prueth.c | 13 +++++++++++-- + drivers/net/ethernet/ti/icssg/icssg_prueth.h | 12 ++++++++++++ + 2 files changed, 23 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c +index 33cb3590a5cde..55d12679b24b7 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c ++++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -389,6 +390,8 @@ static int prueth_perout_enable(void *clockops_data, + struct prueth_emac *emac = clockops_data; + u32 reduction_factor = 0, offset = 0; + struct timespec64 ts; ++ u64 current_cycle; ++ u64 start_offset; + u64 ns_period; + + if (!on) +@@ -427,8 +430,14 @@ static int prueth_perout_enable(void *clockops_data, + writel(reduction_factor, emac->prueth->shram.va + + TIMESYNC_FW_WC_SYNCOUT_REDUCTION_FACTOR_OFFSET); + +- writel(0, emac->prueth->shram.va + +- TIMESYNC_FW_WC_SYNCOUT_START_TIME_CYCLECOUNT_OFFSET); ++ current_cycle = icssg_read_time(emac->prueth->shram.va + ++ TIMESYNC_FW_WC_CYCLECOUNT_OFFSET); ++ ++ /* Rounding of current_cycle count to next second */ ++ start_offset = roundup(current_cycle, MSEC_PER_SEC); ++ ++ hi_lo_writeq(start_offset, emac->prueth->shram.va + ++ TIMESYNC_FW_WC_SYNCOUT_START_TIME_CYCLECOUNT_OFFSET); + + return 0; + } +diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h +index 4d1c895dacdb6..169949acf2539 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h ++++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h +@@ -316,6 +316,18 @@ static inline int prueth_emac_slice(struct prueth_emac *emac) + extern const struct ethtool_ops icssg_ethtool_ops; + extern const struct dev_pm_ops prueth_dev_pm_ops; + ++static inline u64 icssg_read_time(const void __iomem *addr) ++{ ++ u32 low, high; ++ ++ do { ++ high = readl(addr + 4); ++ low = readl(addr); ++ } while (high != readl(addr + 4)); ++ ++ return low + ((u64)high << 32); ++} ++ + /* Classifier helpers */ + void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac); + void icssg_class_set_host_mac_addr(struct regmap *miig_rt, const u8 *mac); +-- +2.43.0 + diff --git a/queue-6.11/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch b/queue-6.11/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch new file mode 100644 index 00000000000..d197276f6b0 --- /dev/null +++ b/queue-6.11/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch @@ -0,0 +1,46 @@ +From 12331dff89e4af3630eb80b45306023aad103187 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 12:43:43 +0100 +Subject: net: vertexcom: mse102x: Fix tx_bytes calculation + +From: Stefan Wahren + +[ Upstream commit e68da664d379f352d41d7955712c44e0a738e4ab ] + +The tx_bytes should consider the actual size of the Ethernet frames +without the SPI encapsulation. But we still need to take care of +Ethernet padding. + +Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") +Signed-off-by: Stefan Wahren +Link: https://patch.msgid.link/20241108114343.6174-3-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/vertexcom/mse102x.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c +index 33ef3a49de8ee..bccf0ac66b1a8 100644 +--- a/drivers/net/ethernet/vertexcom/mse102x.c ++++ b/drivers/net/ethernet/vertexcom/mse102x.c +@@ -437,13 +437,15 @@ static void mse102x_tx_work(struct work_struct *work) + mse = &mses->mse102x; + + while ((txb = skb_dequeue(&mse->txq))) { ++ unsigned int len = max_t(unsigned int, txb->len, ETH_ZLEN); ++ + mutex_lock(&mses->lock); + ret = mse102x_tx_pkt_spi(mse, txb, work_timeout); + mutex_unlock(&mses->lock); + if (ret) { + mse->ndev->stats.tx_dropped++; + } else { +- mse->ndev->stats.tx_bytes += txb->len; ++ mse->ndev->stats.tx_bytes += len; + mse->ndev->stats.tx_packets++; + } + +-- +2.43.0 + diff --git a/queue-6.11/netlink-terminate-outstanding-dump-on-socket-close.patch b/queue-6.11/netlink-terminate-outstanding-dump-on-socket-close.patch new file mode 100644 index 00000000000..66c69a21968 --- /dev/null +++ b/queue-6.11/netlink-terminate-outstanding-dump-on-socket-close.patch @@ -0,0 +1,142 @@ +From f2c29f8abb62d225d8cc43c684be96b7147d7ebf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Nov 2024 17:52:34 -0800 +Subject: netlink: terminate outstanding dump on socket close + +From: Jakub Kicinski + +[ Upstream commit 1904fb9ebf911441f90a68e96b22aa73e4410505 ] + +Netlink supports iterative dumping of data. It provides the families +the following ops: + - start - (optional) kicks off the dumping process + - dump - actual dump helper, keeps getting called until it returns 0 + - done - (optional) pairs with .start, can be used for cleanup +The whole process is asynchronous and the repeated calls to .dump +don't actually happen in a tight loop, but rather are triggered +in response to recvmsg() on the socket. + +This gives the user full control over the dump, but also means that +the user can close the socket without getting to the end of the dump. +To make sure .start is always paired with .done we check if there +is an ongoing dump before freeing the socket, and if so call .done. + +The complication is that sockets can get freed from BH and .done +is allowed to sleep. So we use a workqueue to defer the call, when +needed. + +Unfortunately this does not work correctly. What we defer is not +the cleanup but rather releasing a reference on the socket. +We have no guarantee that we own the last reference, if someone +else holds the socket they may release it in BH and we're back +to square one. + +The whole dance, however, appears to be unnecessary. Only the user +can interact with dumps, so we can clean up when socket is closed. +And close always happens in process context. Some async code may +still access the socket after close, queue notification skbs to it etc. +but no dumps can start, end or otherwise make progress. + +Delete the workqueue and flush the dump state directly from the release +handler. Note that further cleanup is possible in -next, for instance +we now always call .done before releasing the main module reference, +so dump doesn't have to take a reference of its own. + +Reported-by: syzkaller +Fixes: ed5d7788a934 ("netlink: Do not schedule work from sk_destruct") +Reviewed-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/netlink/af_netlink.c | 31 ++++++++----------------------- + net/netlink/af_netlink.h | 2 -- + 2 files changed, 8 insertions(+), 25 deletions(-) + +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 0a9287fadb47a..f84aad420d446 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) + + static void netlink_sock_destruct(struct sock *sk) + { +- struct netlink_sock *nlk = nlk_sk(sk); +- +- if (nlk->cb_running) { +- if (nlk->cb.done) +- nlk->cb.done(&nlk->cb); +- module_put(nlk->cb.module); +- kfree_skb(nlk->cb.skb); +- } +- + skb_queue_purge(&sk->sk_receive_queue); + + if (!sock_flag(sk, SOCK_DEAD)) { +@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk) + WARN_ON(nlk_sk(sk)->groups); + } + +-static void netlink_sock_destruct_work(struct work_struct *work) +-{ +- struct netlink_sock *nlk = container_of(work, struct netlink_sock, +- work); +- +- sk_free(&nlk->sk); +-} +- + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on + * SMP. Look, when several writers sleep and reader wakes them up, all but one + * immediately hit write lock and grab all the cpus. Exclusive sleep solves +@@ -731,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head) + if (!refcount_dec_and_test(&sk->sk_refcnt)) + return; + +- if (nlk->cb_running && nlk->cb.done) { +- INIT_WORK(&nlk->work, netlink_sock_destruct_work); +- schedule_work(&nlk->work); +- return; +- } +- + sk_free(sk); + } + +@@ -788,6 +765,14 @@ static int netlink_release(struct socket *sock) + NETLINK_URELEASE, &n); + } + ++ /* Terminate any outstanding dump */ ++ if (nlk->cb_running) { ++ if (nlk->cb.done) ++ nlk->cb.done(&nlk->cb); ++ module_put(nlk->cb.module); ++ kfree_skb(nlk->cb.skb); ++ } ++ + module_put(nlk->module); + + if (netlink_is_kernel(sk)) { +diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h +index 9751e29d4bbb9..b1a17c0d97a10 100644 +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -4,7 +4,6 @@ + + #include + #include +-#include + #include + + /* flags */ +@@ -51,7 +50,6 @@ struct netlink_sock { + + struct rhash_head node; + struct rcu_head rcu; +- struct work_struct work; + }; + + static inline struct netlink_sock *nlk_sk(struct sock *sk) +-- +2.43.0 + diff --git a/queue-6.11/revert-drm-amd-pm-correct-the-workload-setting.patch b/queue-6.11/revert-drm-amd-pm-correct-the-workload-setting.patch new file mode 100644 index 00000000000..cb9ee80f062 --- /dev/null +++ b/queue-6.11/revert-drm-amd-pm-correct-the-workload-setting.patch @@ -0,0 +1,388 @@ +From 3dd802692b11c8ca9e1c007da33f754d7bf3b11a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Nov 2024 09:22:14 -0500 +Subject: Revert "drm/amd/pm: correct the workload setting" + +From: Alex Deucher + +[ Upstream commit 44f392fbf628a7ff2d8bb8e83ca1851261f81a6f ] + +This reverts commit 74e1006430a5377228e49310f6d915628609929e. + +This causes a regression in the workload selection. +A more extensive fix is being worked on. +For now, revert. + +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3618 +Fixes: 74e1006430a5 ("drm/amd/pm: correct the workload setting") +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 49 ++++++------------- + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- + .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 5 +- + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 +- + .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +- + .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- + .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +- + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 20 ++------ + .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 5 +- + .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 ++-- + drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 --- + drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 - + 12 files changed, 36 insertions(+), 84 deletions(-) + +diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +index ee1bcfaae3e3d..80e60ea2d11e3 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +@@ -1259,33 +1259,26 @@ static int smu_sw_init(void *handle) + smu->watermarks_bitmap = 0; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- smu->user_dpm_profile.user_workload_mask = 0; + + atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); + atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); + atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); + +- smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; +- smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; +- smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; +- smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; +- smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; +- smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; +- smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; ++ smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + + if (smu->is_apu || +- !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { +- smu->driver_workload_mask = +- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; +- } else { +- smu->driver_workload_mask = +- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; +- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; +- } ++ !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) ++ smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; ++ else ++ smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + +- smu->workload_mask = smu->driver_workload_mask | +- smu->user_dpm_profile.user_workload_mask; + smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; +@@ -2355,20 +2348,17 @@ static int smu_switch_power_profile(void *handle, + return -EINVAL; + + if (!en) { +- smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); ++ smu->workload_mask &= ~(1 << smu->workload_prority[type]); + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + workload[0] = smu->workload_setting[index]; + } else { +- smu->driver_workload_mask |= (1 << smu->workload_priority[type]); ++ smu->workload_mask |= (1 << smu->workload_prority[type]); + index = fls(smu->workload_mask); + index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + workload[0] = smu->workload_setting[index]; + } + +- smu->workload_mask = smu->driver_workload_mask | +- smu->user_dpm_profile.user_workload_mask; +- + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) + smu_bump_power_profile_mode(smu, workload, 0); +@@ -3059,23 +3049,12 @@ static int smu_set_power_profile_mode(void *handle, + uint32_t param_size) + { + struct smu_context *smu = handle; +- int ret; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || + !smu->ppt_funcs->set_power_profile_mode) + return -EOPNOTSUPP; + +- if (smu->user_dpm_profile.user_workload_mask & +- (1 << smu->workload_priority[param[param_size]])) +- return 0; +- +- smu->user_dpm_profile.user_workload_mask = +- (1 << smu->workload_priority[param[param_size]]); +- smu->workload_mask = smu->user_dpm_profile.user_workload_mask | +- smu->driver_workload_mask; +- ret = smu_bump_power_profile_mode(smu, param, param_size); +- +- return ret; ++ return smu_bump_power_profile_mode(smu, param, param_size); + } + + static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) +diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +index d60d9a12a47ef..b44a185d07e84 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +@@ -240,7 +240,6 @@ struct smu_user_dpm_profile { + /* user clock state information */ + uint32_t clk_mask[SMU_CLK_COUNT]; + uint32_t clk_dependency; +- uint32_t user_workload_mask; + }; + + #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ +@@ -558,8 +557,7 @@ struct smu_context { + bool disable_uclk_switch; + + uint32_t workload_mask; +- uint32_t driver_workload_mask; +- uint32_t workload_priority[WORKLOAD_POLICY_MAX]; ++ uint32_t workload_prority[WORKLOAD_POLICY_MAX]; + uint32_t workload_setting[WORKLOAD_POLICY_MAX]; + uint32_t power_profile_mode; + uint32_t default_power_profile_mode; +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +index 31fe512028f46..c0f6b59369b7c 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +@@ -1455,6 +1455,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, + return -EINVAL; + } + ++ + if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && + (smu->smc_fw_version >= 0x360d00)) { + if (size != 10) +@@ -1522,14 +1523,14 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetWorkloadMask, +- smu->workload_mask, ++ 1 << workload_type, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); + return ret; + } + +- smu_cmn_assign_power_profile(smu); ++ smu->power_profile_mode = profile_mode; + + return 0; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +index bb4ae529ae20e..076620fa3ef5a 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +@@ -2081,13 +2081,10 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u + smu->power_profile_mode); + if (workload_type < 0) + return -EINVAL; +- + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, +- smu->workload_mask, NULL); ++ 1 << workload_type, NULL); + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); +- else +- smu_cmn_assign_power_profile(smu); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +index ca94c52663c07..0d3e1a121b670 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +@@ -1786,13 +1786,10 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * + smu->power_profile_mode); + if (workload_type < 0) + return -EINVAL; +- + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, +- smu->workload_mask, NULL); ++ 1 << workload_type, NULL); + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); +- else +- smu_cmn_assign_power_profile(smu); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +index 952ee22cbc90e..1fe020f1f4dbe 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +@@ -1079,7 +1079,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, +- smu->workload_mask, ++ 1 << workload_type, + NULL); + if (ret) { + dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", +@@ -1087,7 +1087,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, + return ret; + } + +- smu_cmn_assign_power_profile(smu); ++ smu->power_profile_mode = profile_mode; + + return 0; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +index 62316a6707ef2..cc0504b063fa3 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +@@ -890,14 +890,14 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, +- smu->workload_mask, ++ 1 << workload_type, + NULL); + if (ret) { + dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); + return ret; + } + +- smu_cmn_assign_power_profile(smu); ++ smu->power_profile_mode = profile_mode; + + return 0; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +index 5dd7ceca64fee..d53e162dcd8de 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +@@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + DpmActivityMonitorCoeffInt_t *activity_monitor = + &(activity_monitor_external.DpmActivityMonitorCoeffInt); + int workload_type, ret = 0; +- u32 workload_mask; ++ u32 workload_mask, selected_workload_mask; + + smu->power_profile_mode = input[size]; + +@@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + if (workload_type < 0) + return -EINVAL; + +- workload_mask = 1 << workload_type; ++ selected_workload_mask = workload_mask = 1 << workload_type; + + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ + if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && +@@ -2567,22 +2567,12 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, + workload_mask |= 1 << workload_type; + } + +- smu->workload_mask |= workload_mask; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetWorkloadMask, +- smu->workload_mask, ++ workload_mask, + NULL); +- if (!ret) { +- smu_cmn_assign_power_profile(smu); +- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { +- workload_type = smu_cmn_to_asic_specific_index(smu, +- CMN2ASIC_MAPPING_WORKLOAD, +- PP_SMC_POWER_PROFILE_FULLSCREEN3D); +- smu->power_profile_mode = smu->workload_mask & (1 << workload_type) +- ? PP_SMC_POWER_PROFILE_FULLSCREEN3D +- : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; +- } +- } ++ if (!ret) ++ smu->workload_mask = selected_workload_mask; + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +index 9d0b19419de0f..b891a5e0a3969 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +@@ -2499,14 +2499,13 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp + smu->power_profile_mode); + if (workload_type < 0) + return -EINVAL; +- + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, +- smu->workload_mask, NULL); ++ 1 << workload_type, NULL); + + if (ret) + dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else +- smu_cmn_assign_power_profile(smu); ++ smu->workload_mask = (1 << workload_type); + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +index d9f0e7f81ed78..eaf80c5b3e4d0 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +@@ -1508,11 +1508,12 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, + if (workload_type < 0) + return -EINVAL; + +- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, +- smu->workload_mask, NULL); +- ++ ret = smu_cmn_send_smc_msg_with_param(smu, ++ SMU_MSG_SetWorkloadMask, ++ 1 << workload_type, ++ NULL); + if (!ret) +- smu_cmn_assign_power_profile(smu); ++ smu->workload_mask = 1 << workload_type; + + return ret; + } +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +index bdfc5e617333d..91ad434bcdaeb 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +@@ -1138,14 +1138,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, + return ret; + } + +-void smu_cmn_assign_power_profile(struct smu_context *smu) +-{ +- uint32_t index; +- index = fls(smu->workload_mask); +- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; +- smu->power_profile_mode = smu->workload_setting[index]; +-} +- + bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) + { + struct pci_dev *p = NULL; +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +index 8a801e389659d..1de685defe85b 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +@@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); + int smu_cmn_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state); + +-void smu_cmn_assign_power_profile(struct smu_context *smu); +- + /* + * Helper function to make sysfs_emit_at() happy. Align buf to + * the current page boundary and record the offset. +-- +2.43.0 + diff --git a/queue-6.11/revert-rdma-core-fix-enodev-error-for-iwarp-test-ove.patch b/queue-6.11/revert-rdma-core-fix-enodev-error-for-iwarp-test-ove.patch new file mode 100644 index 00000000000..48438f7b857 --- /dev/null +++ b/queue-6.11/revert-rdma-core-fix-enodev-error-for-iwarp-test-ove.patch @@ -0,0 +1,54 @@ +From 0ba631752006c428aaf72fa6e6d3bf94af127013 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 10:56:26 +0200 +Subject: Revert "RDMA/core: Fix ENODEV error for iWARP test over vlan" + +From: Leon Romanovsky + +[ Upstream commit 6abe2a90808192a5a8b2825293e5f10e80fdea56 ] + +The citied commit in Fixes line caused to regression for udaddy [1] +application. It doesn't work over VLANs anymore. + +Client: + ifconfig eth2 1.1.1.1 + ip link add link eth2 name p0.3597 type vlan protocol 802.1Q id 3597 + ip link set dev p0.3597 up + ip addr add 2.2.2.2/16 dev p0.3597 + udaddy -S 847 -C 220 -c 2 -t 0 -s 2.2.2.3 -b 2.2.2.2 + +Server: + ifconfig eth2 1.1.1.3 + ip link add link eth2 name p0.3597 type vlan protocol 802.1Q id 3597 + ip link set dev p0.3597 up + ip addr add 2.2.2.3/16 dev p0.3597 + udaddy -S 847 -C 220 -c 2 -t 0 -b 2.2.2.3 + +[1] https://github.com/linux-rdma/rdma-core/blob/master/librdmacm/examples/udaddy.c + +Fixes: 5069d7e202f6 ("RDMA/core: Fix ENODEV error for iWARP test over vlan") +Reported-by: Leon Romanovsky +Closes: https://lore.kernel.org/all/20241110130746.GA48891@unreal +Link: https://patch.msgid.link/bb9d403419b2b9566da5b8bf0761fa8377927e49.1731401658.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/core/addr.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index c4cf26f1d1496..be0743dac3fff 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -269,8 +269,6 @@ rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) + break; + #endif + } +- if (!ret && dev && is_vlan_dev(dev)) +- dev = vlan_dev_real_dev(dev); + return ret ? ERR_PTR(ret) : dev; + } + +-- +2.43.0 + diff --git a/queue-6.11/samples-pktgen-correct-dev-to-dev.patch b/queue-6.11/samples-pktgen-correct-dev-to-dev.patch new file mode 100644 index 00000000000..d9980f21e2b --- /dev/null +++ b/queue-6.11/samples-pktgen-correct-dev-to-dev.patch @@ -0,0 +1,40 @@ +From eda88e12a2fff9712111b87d1e5fb35a019d5fe5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 11:03:47 +0800 +Subject: samples: pktgen: correct dev to DEV + +From: Wei Fang + +[ Upstream commit 3342dc8b4623d835e7dd76a15cec2e5a94fe2f93 ] + +In the pktgen_sample01_simple.sh script, the device variable is uppercase +'DEV' instead of lowercase 'dev'. Because of this typo, the script cannot +enable UDP tx checksum. + +Fixes: 460a9aa23de6 ("samples: pktgen: add UDP tx checksum support") +Signed-off-by: Wei Fang +Reviewed-by: Simon Horman +Acked-by: Jesper Dangaard Brouer +Link: https://patch.msgid.link/20241112030347.1849335-1-wei.fang@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + samples/pktgen/pktgen_sample01_simple.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh +index cdb9f497f87da..66cb707479e6c 100755 +--- a/samples/pktgen/pktgen_sample01_simple.sh ++++ b/samples/pktgen/pktgen_sample01_simple.sh +@@ -76,7 +76,7 @@ if [ -n "$DST_PORT" ]; then + pg_set $DEV "udp_dst_max $UDP_DST_MAX" + fi + +-[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM" ++[ ! -z "$UDP_CSUM" ] && pg_set $DEV "flag UDPCSUM" + + # Setup random UDP port src range + pg_set $DEV "flag UDPSRC_RND" +-- +2.43.0 + diff --git a/queue-6.11/sctp-fix-possible-uaf-in-sctp_v6_available.patch b/queue-6.11/sctp-fix-possible-uaf-in-sctp_v6_available.patch new file mode 100644 index 00000000000..294b9628097 --- /dev/null +++ b/queue-6.11/sctp-fix-possible-uaf-in-sctp_v6_available.patch @@ -0,0 +1,140 @@ +From b9a15e07fbb68d4568aaa86020deead9afee6641 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 19:20:21 +0000 +Subject: sctp: fix possible UAF in sctp_v6_available() + +From: Eric Dumazet + +[ Upstream commit eb72e7fcc83987d5d5595b43222f23b295d5de7f ] + +A lockdep report [1] with CONFIG_PROVE_RCU_LIST=y hints +that sctp_v6_available() is calling dev_get_by_index_rcu() +and ipv6_chk_addr() without holding rcu. + +[1] + ============================= + WARNING: suspicious RCU usage + 6.12.0-rc5-virtme #1216 Tainted: G W + ----------------------------- + net/core/dev.c:876 RCU-list traversed in non-reader section!! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 + 1 lock held by sctp_hello/31495: + #0: ffff9f1ebbdb7418 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_bind (./arch/x86/include/asm/jump_label.h:27 net/sctp/socket.c:315) sctp + +stack backtrace: + CPU: 7 UID: 0 PID: 31495 Comm: sctp_hello Tainted: G W 6.12.0-rc5-virtme #1216 + Tainted: [W]=WARN + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 + Call Trace: + + dump_stack_lvl (lib/dump_stack.c:123) + lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822) + dev_get_by_index_rcu (net/core/dev.c:876 (discriminator 7)) + sctp_v6_available (net/sctp/ipv6.c:701) sctp + sctp_do_bind (net/sctp/socket.c:400 (discriminator 1)) sctp + sctp_bind (net/sctp/socket.c:320) sctp + inet6_bind_sk (net/ipv6/af_inet6.c:465) + ? security_socket_bind (security/security.c:4581 (discriminator 1)) + __sys_bind (net/socket.c:1848 net/socket.c:1869) + ? do_user_addr_fault (./include/linux/rcupdate.h:347 ./include/linux/rcupdate.h:880 ./include/linux/mm.h:729 arch/x86/mm/fault.c:1340) + ? do_user_addr_fault (./arch/x86/include/asm/preempt.h:84 (discriminator 13) ./include/linux/rcupdate.h:98 (discriminator 13) ./include/linux/rcupdate.h:882 (discriminator 13) ./include/linux/mm.h:729 (discriminator 13) arch/x86/mm/fault.c:1340 (discriminator 13)) + __x64_sys_bind (net/socket.c:1877 (discriminator 1) net/socket.c:1875 (discriminator 1) net/socket.c:1875 (discriminator 1)) + do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) + entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + RIP: 0033:0x7f59b934a1e7 + Code: 44 00 00 48 8b 15 39 8c 0c 00 f7 d8 64 89 02 b8 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 b8 31 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 09 8c 0c 00 f7 d8 64 89 01 48 +All code +======== + 0: 44 00 00 add %r8b,(%rax) + 3: 48 8b 15 39 8c 0c 00 mov 0xc8c39(%rip),%rdx # 0xc8c43 + a: f7 d8 neg %eax + c: 64 89 02 mov %eax,%fs:(%rdx) + f: b8 ff ff ff ff mov $0xffffffff,%eax + 14: eb bd jmp 0xffffffffffffffd3 + 16: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) + 1d: 00 00 00 + 20: 0f 1f 00 nopl (%rax) + 23: b8 31 00 00 00 mov $0x31,%eax + 28: 0f 05 syscall + 2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction + 30: 73 01 jae 0x33 + 32: c3 ret + 33: 48 8b 0d 09 8c 0c 00 mov 0xc8c09(%rip),%rcx # 0xc8c43 + 3a: f7 d8 neg %eax + 3c: 64 89 01 mov %eax,%fs:(%rcx) + 3f: 48 rex.W + +Code starting with the faulting instruction +=========================================== + 0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax + 6: 73 01 jae 0x9 + 8: c3 ret + 9: 48 8b 0d 09 8c 0c 00 mov 0xc8c09(%rip),%rcx # 0xc8c19 + 10: f7 d8 neg %eax + 12: 64 89 01 mov %eax,%fs:(%rcx) + 15: 48 rex.W + RSP: 002b:00007ffe2d0ad398 EFLAGS: 00000202 ORIG_RAX: 0000000000000031 + RAX: ffffffffffffffda RBX: 00007ffe2d0ad3d0 RCX: 00007f59b934a1e7 + RDX: 000000000000001c RSI: 00007ffe2d0ad3d0 RDI: 0000000000000005 + RBP: 0000000000000005 R08: 1999999999999999 R09: 0000000000000000 + R10: 00007f59b9253298 R11: 0000000000000202 R12: 00007ffe2d0ada61 + R13: 0000000000000000 R14: 0000562926516dd8 R15: 00007f59b9479000 + + +Fixes: 6fe1e52490a9 ("sctp: check ipv6 addr with sk_bound_dev if set") +Signed-off-by: Eric Dumazet +Cc: Marcelo Ricardo Leitner +Acked-by: Xin Long +Link: https://patch.msgid.link/20241107192021.2579789-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/ipv6.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c +index f7b809c0d142c..38e2fbdcbeac4 100644 +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -683,7 +683,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) + struct sock *sk = &sp->inet.sk; + struct net *net = sock_net(sk); + struct net_device *dev = NULL; +- int type; ++ int type, res, bound_dev_if; + + type = ipv6_addr_type(in6); + if (IPV6_ADDR_ANY == type) +@@ -697,14 +697,21 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) + if (!(type & IPV6_ADDR_UNICAST)) + return 0; + +- if (sk->sk_bound_dev_if) { +- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); ++ rcu_read_lock(); ++ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); ++ if (bound_dev_if) { ++ res = 0; ++ dev = dev_get_by_index_rcu(net, bound_dev_if); + if (!dev) +- return 0; ++ goto out; + } + +- return ipv6_can_nonlocal_bind(net, &sp->inet) || +- ipv6_chk_addr(net, in6, dev, 0); ++ res = ipv6_can_nonlocal_bind(net, &sp->inet) || ++ ipv6_chk_addr(net, in6, dev, 0); ++ ++out: ++ rcu_read_unlock(); ++ return res; + } + + /* This function checks if the address is a valid address to be used for +-- +2.43.0 + diff --git a/queue-6.11/series b/queue-6.11/series new file mode 100644 index 00000000000..3b1d5af6997 --- /dev/null +++ b/queue-6.11/series @@ -0,0 +1,35 @@ +netlink-terminate-outstanding-dump-on-socket-close.patch +sctp-fix-possible-uaf-in-sctp_v6_available.patch +net-vertexcom-mse102x-fix-tx_bytes-calculation.patch +drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch +net-fix-data-races-around-sk-sk_forward_alloc.patch +mptcp-error-out-earlier-on-disconnect.patch +mptcp-cope-racing-subflow-creation-in-mptcp_rcv_spac.patch +net-mlx5-fix-msix-vectors-to-respect-platform-limit.patch +net-mlx5-fs-lock-fte-when-checking-if-active.patch +net-mlx5e-ktls-fix-incorrect-page-refcounting.patch +net-mlx5e-clear-xdp-features-on-non-uplink-represent.patch +net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch +net-mlx5e-disable-loopback-self-test-on-multi-pf-net.patch +drm-i915-gsc-arl-h-and-arl-u-need-a-newer-gsc-fw.patch +virtio-vsock-fix-accept_queue-memory-leak.patch +vsock-fix-sk_error_queue-memory-leak.patch +virtio-vsock-improve-msg_zerocopy-error-handling.patch +revert-rdma-core-fix-enodev-error-for-iwarp-test-ove.patch +drivers-perf-fix-wrong-put_cpu-placement.patch +bluetooth-hci_core-fix-calling-mgmt_device_connected.patch +bluetooth-btintel-direct-exception-event-to-bluetoot.patch +drm-panthor-fix-handling-of-partial-gpu-mapping-of-b.patch +net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch +net-phylink-ensure-phy-momentary-link-fails-are-hand.patch +samples-pktgen-correct-dev-to-dev.patch +net-stmmac-dwmac-mediatek-fix-inverted-handling-of-m.patch +net-make-copy_safe_from_sockptr-match-documentation.patch +stmmac-dwmac-intel-plat-fix-call-balance-of-tx_clk-h.patch +drm-vmwgfx-avoid-null_ptr_deref-in-vmw_framebuffer_s.patch +net-ti-icssg-prueth-fix-1-pps-sync.patch +bonding-add-ns-target-multicast-address-to-slave-dev.patch +arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch +arm-fix-cacheflush-with-pan.patch +tools-mm-fix-compile-error.patch +revert-drm-amd-pm-correct-the-workload-setting.patch diff --git a/queue-6.11/stmmac-dwmac-intel-plat-fix-call-balance-of-tx_clk-h.patch b/queue-6.11/stmmac-dwmac-intel-plat-fix-call-balance-of-tx_clk-h.patch new file mode 100644 index 00000000000..8454424f6e6 --- /dev/null +++ b/queue-6.11/stmmac-dwmac-intel-plat-fix-call-balance-of-tx_clk-h.patch @@ -0,0 +1,96 @@ +From 947d32e2dcf3a0dd9739c7307f12731dd08a9eb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Nov 2024 20:33:34 +0300 +Subject: stmmac: dwmac-intel-plat: fix call balance of tx_clk handling + routines + +From: Vitalii Mordan + +[ Upstream commit 5b366eae71937ae7412365340b431064625f9617 ] + +If the clock dwmac->tx_clk was not enabled in intel_eth_plat_probe, +it should not be disabled in any path. + +Conversely, if it was enabled in intel_eth_plat_probe, it must be disabled +in all error paths to ensure proper cleanup. + +Found by Linux Verification Center (linuxtesting.org) with Klever. + +Fixes: 9efc9b2b04c7 ("net: stmmac: Add dwmac-intel-plat for GBE driver") +Signed-off-by: Vitalii Mordan +Link: https://patch.msgid.link/20241108173334.2973603-1-mordan@ispras.ru +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../stmicro/stmmac/dwmac-intel-plat.c | 25 +++++++++++++------ + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +index d68f0c4e78350..9739bc9867c51 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +@@ -108,7 +108,12 @@ static int intel_eth_plat_probe(struct platform_device *pdev) + if (IS_ERR(dwmac->tx_clk)) + return PTR_ERR(dwmac->tx_clk); + +- clk_prepare_enable(dwmac->tx_clk); ++ ret = clk_prepare_enable(dwmac->tx_clk); ++ if (ret) { ++ dev_err(&pdev->dev, ++ "Failed to enable tx_clk\n"); ++ return ret; ++ } + + /* Check and configure TX clock rate */ + rate = clk_get_rate(dwmac->tx_clk); +@@ -119,7 +124,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) + if (ret) { + dev_err(&pdev->dev, + "Failed to set tx_clk\n"); +- return ret; ++ goto err_tx_clk_disable; + } + } + } +@@ -133,7 +138,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) + if (ret) { + dev_err(&pdev->dev, + "Failed to set clk_ptp_ref\n"); +- return ret; ++ goto err_tx_clk_disable; + } + } + } +@@ -149,12 +154,15 @@ static int intel_eth_plat_probe(struct platform_device *pdev) + } + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); +- if (ret) { +- clk_disable_unprepare(dwmac->tx_clk); +- return ret; +- } ++ if (ret) ++ goto err_tx_clk_disable; + + return 0; ++ ++err_tx_clk_disable: ++ if (dwmac->data->tx_clk_en) ++ clk_disable_unprepare(dwmac->tx_clk); ++ return ret; + } + + static void intel_eth_plat_remove(struct platform_device *pdev) +@@ -162,7 +170,8 @@ static void intel_eth_plat_remove(struct platform_device *pdev) + struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); + + stmmac_pltfr_remove(pdev); +- clk_disable_unprepare(dwmac->tx_clk); ++ if (dwmac->data->tx_clk_en) ++ clk_disable_unprepare(dwmac->tx_clk); + } + + static struct platform_driver intel_eth_plat_driver = { +-- +2.43.0 + diff --git a/queue-6.11/tools-mm-fix-compile-error.patch b/queue-6.11/tools-mm-fix-compile-error.patch new file mode 100644 index 00000000000..3d352ef6fd8 --- /dev/null +++ b/queue-6.11/tools-mm-fix-compile-error.patch @@ -0,0 +1,44 @@ +From 6beb08b0d62cd0325a33a0cbf335d1b6cfbc89d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Nov 2024 19:16:55 +0200 +Subject: tools/mm: fix compile error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Motiejus JakÅ`tys + +[ Upstream commit a39326767c55c00c7c313333404cbcb502cce8fe ] + +Add a missing semicolon. + +Link: https://lkml.kernel.org/r/20241112171655.1662670-1-motiejus@jakstys.lt +Fixes: ece5897e5a10 ("tools/mm: -Werror fixes in page-types/slabinfo") +Signed-off-by: Motiejus JakÅ`tys +Closes: https://github.com/NixOS/nixpkgs/issues/355369 +Reviewed-by: SeongJae Park +Reviewed-by: Vishal Moola (Oracle) +Acked-by: Oleksandr Natalenko +Cc: Wladislav Wiebe +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/mm/page-types.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c +index 2a4ca4dd2da80..69f00eab1b8c7 100644 +--- a/tools/mm/page-types.c ++++ b/tools/mm/page-types.c +@@ -421,7 +421,7 @@ static void show_page(unsigned long voffset, unsigned long offset, + if (opt_file) + printf("%lx\t", voffset); + if (opt_list_cgroup) +- printf("@%" PRIu64 "\t", cgroup) ++ printf("@%" PRIu64 "\t", cgroup); + if (opt_list_mapcnt) + printf("%" PRIu64 "\t", mapcnt); + +-- +2.43.0 + diff --git a/queue-6.11/virtio-vsock-fix-accept_queue-memory-leak.patch b/queue-6.11/virtio-vsock-fix-accept_queue-memory-leak.patch new file mode 100644 index 00000000000..95b9c50e9a5 --- /dev/null +++ b/queue-6.11/virtio-vsock-fix-accept_queue-memory-leak.patch @@ -0,0 +1,93 @@ +From 097c7df1d9706180da4c105c991a2c6a88bf2448 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 21:46:12 +0100 +Subject: virtio/vsock: Fix accept_queue memory leak + +From: Michal Luczaj + +[ Upstream commit d7b0ff5a866724c3ad21f2628c22a63336deec3f ] + +As the final stages of socket destruction may be delayed, it is possible +that virtio_transport_recv_listen() will be called after the accept_queue +has been flushed, but before the SOCK_DONE flag has been set. As a result, +sockets enqueued after the flush would remain unremoved, leading to a +memory leak. + +vsock_release + __vsock_release + lock + virtio_transport_release + virtio_transport_close + schedule_delayed_work(close_work) + sk_shutdown = SHUTDOWN_MASK +(!) flush accept_queue + release + virtio_transport_recv_pkt + vsock_find_bound_socket + lock + if flag(SOCK_DONE) return + virtio_transport_recv_listen + child = vsock_create_connected + (!) vsock_enqueue_accept(child) + release +close_work + lock + virtio_transport_do_close + set_flag(SOCK_DONE) + virtio_transport_remove_sock + vsock_remove_sock + vsock_remove_bound + release + +Introduce a sk_shutdown check to disallow vsock_enqueue_accept() during +socket destruction. + +unreferenced object 0xffff888109e3f800 (size 2040): + comm "kworker/5:2", pid 371, jiffies 4294940105 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 28 00 0b 40 00 00 00 00 00 00 00 00 00 00 00 00 (..@............ + backtrace (crc 9e5f4e84): + [] kmem_cache_alloc_noprof+0x2c1/0x360 + [] sk_prot_alloc+0x30/0x120 + [] sk_alloc+0x2c/0x4b0 + [] __vsock_create.constprop.0+0x2a/0x310 + [] virtio_transport_recv_pkt+0x4dc/0x9a0 + [] vsock_loopback_work+0xfd/0x140 + [] process_one_work+0x20c/0x570 + [] worker_thread+0x1bf/0x3a0 + [] kthread+0xdd/0x110 + [] ret_from_fork+0x2d/0x50 + [] ret_from_fork_asm+0x1a/0x30 + +Fixes: 3fe356d58efa ("vsock/virtio: discard packets only when socket is really closed") +Reviewed-by: Stefano Garzarella +Signed-off-by: Michal Luczaj +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/virtio_transport_common.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c +index 01b6b1ed5acfb..5437819872dd7 100644 +--- a/net/vmw_vsock/virtio_transport_common.c ++++ b/net/vmw_vsock/virtio_transport_common.c +@@ -1478,6 +1478,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, + return -ENOMEM; + } + ++ /* __vsock_release() might have already flushed accept_queue. ++ * Subsequent enqueues would lead to a memory leak. ++ */ ++ if (sk->sk_shutdown == SHUTDOWN_MASK) { ++ virtio_transport_reset_no_sock(t, skb); ++ return -ESHUTDOWN; ++ } ++ + child = vsock_create_connected(sk); + if (!child) { + virtio_transport_reset_no_sock(t, skb); +-- +2.43.0 + diff --git a/queue-6.11/virtio-vsock-improve-msg_zerocopy-error-handling.patch b/queue-6.11/virtio-vsock-improve-msg_zerocopy-error-handling.patch new file mode 100644 index 00000000000..694b3289ff6 --- /dev/null +++ b/queue-6.11/virtio-vsock-improve-msg_zerocopy-error-handling.patch @@ -0,0 +1,36 @@ +From 6f0c42aac71cde60e8f064106cd93a78fa548635 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 21:46:14 +0100 +Subject: virtio/vsock: Improve MSG_ZEROCOPY error handling + +From: Michal Luczaj + +[ Upstream commit 60cf6206a1f513512f5d73fa4d3dbbcad2e7dcd6 ] + +Add a missing kfree_skb() to prevent memory leaks. + +Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") +Reviewed-by: Stefano Garzarella +Signed-off-by: Michal Luczaj +Acked-by: Arseniy Krasnov +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/virtio_transport_common.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c +index 5437819872dd7..0211964e45459 100644 +--- a/net/vmw_vsock/virtio_transport_common.c ++++ b/net/vmw_vsock/virtio_transport_common.c +@@ -400,6 +400,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, + if (virtio_transport_init_zcopy_skb(vsk, skb, + info->msg, + can_zcopy)) { ++ kfree_skb(skb); + ret = -ENOMEM; + break; + } +-- +2.43.0 + diff --git a/queue-6.11/vsock-fix-sk_error_queue-memory-leak.patch b/queue-6.11/vsock-fix-sk_error_queue-memory-leak.patch new file mode 100644 index 00000000000..41d8f36d746 --- /dev/null +++ b/queue-6.11/vsock-fix-sk_error_queue-memory-leak.patch @@ -0,0 +1,59 @@ +From d64311e83b8bdf46160ccca4668d75dbbc995a29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Nov 2024 21:46:13 +0100 +Subject: vsock: Fix sk_error_queue memory leak + +From: Michal Luczaj + +[ Upstream commit fbf7085b3ad1c7cc0677834c90f985f1b4f77a33 ] + +Kernel queues MSG_ZEROCOPY completion notifications on the error queue. +Where they remain, until explicitly recv()ed. To prevent memory leaks, +clean up the queue when the socket is destroyed. + +unreferenced object 0xffff8881028beb00 (size 224): + comm "vsock_test", pid 1218, jiffies 4294694897 + hex dump (first 32 bytes): + 90 b0 21 17 81 88 ff ff 90 b0 21 17 81 88 ff ff ..!.......!..... + 00 00 00 00 00 00 00 00 00 b0 21 17 81 88 ff ff ..........!..... + backtrace (crc 6c7031ca): + [] kmem_cache_alloc_node_noprof+0x2f7/0x370 + [] __alloc_skb+0x132/0x180 + [] sock_omalloc+0x4b/0x80 + [] msg_zerocopy_realloc+0x9e/0x240 + [] virtio_transport_send_pkt_info+0x412/0x4c0 + [] virtio_transport_stream_enqueue+0x43/0x50 + [] vsock_connectible_sendmsg+0x373/0x450 + [] ____sys_sendmsg+0x365/0x3a0 + [] ___sys_sendmsg+0x84/0xd0 + [] __sys_sendmsg+0x47/0x80 + [] do_syscall_64+0x93/0x180 + [] entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") +Signed-off-by: Michal Luczaj +Reviewed-by: Stefano Garzarella +Acked-by: Arseniy Krasnov +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/af_vsock.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index 0ff9b2dd86bac..a0202d9b47921 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -835,6 +835,9 @@ static void vsock_sk_destruct(struct sock *sk) + { + struct vsock_sock *vsk = vsock_sk(sk); + ++ /* Flush MSG_ZEROCOPY leftovers. */ ++ __skb_queue_purge(&sk->sk_error_queue); ++ + vsock_deassign_transport(vsk); + + /* When clearing these addresses, there's no need to set the family and +-- +2.43.0 +