Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)

committer Sasha Levin <sashal@kernel.org>

Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)
author Sasha Levin <sashal@kernel.org>
Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)
committer Sasha Levin <sashal@kernel.org>
Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)
diff --git a/queue-6.1/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch b/queue-6.1/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch

new file mode 100644 (file)

index 0000000..d31edce
--- /dev/null
+++ b/queue-6.1/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch
@@ -0,0 +1,144 @@
+From 12ca009cc5ac6163370a3cce23f3201b7d1c0975 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Sep 2024 06:57:11 +0100
+Subject: ARM: 9419/1: mm: Fix kernel memory mapping for xip kernels
+
+From: Harith G <harith.g@alifsemi.com>
+
+[ Upstream commit ed6cbe6e5563452f305e89c15846820f2874e431 ]
+
+The patchset introducing kernel_sec_start/end variables to separate the
+kernel/lowmem memory mappings, broke the mapping of the kernel memory
+for xipkernels.
+
+kernel_sec_start/end variables are in RO area before the MMU is switched
+on for xipkernels.
+So these cannot be set early in boot in head.S. Fix this by setting these
+after MMU is switched on.
+xipkernels need two different mappings for kernel text (starting at
+CONFIG_XIP_PHYS_ADDR) and data (starting at CONFIG_PHYS_OFFSET).
+Also, move the kernel code mapping from devicemaps_init() to map_kernel().
+
+Fixes: a91da5457085 ("ARM: 9089/1: Define kernel physical section start and end")
+Signed-off-by: Harith George <harith.g@alifsemi.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/kernel/head.S |  8 ++++++--
+ arch/arm/mm/mmu.c      | 34 +++++++++++++++++++++-------------
+ 2 files changed, 27 insertions(+), 15 deletions(-)
+
+diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
+index 29e2900178a1f..b97da9e069a06 100644
+--- a/arch/arm/kernel/head.S
++++ b/arch/arm/kernel/head.S
+@@ -252,11 +252,15 @@ __create_page_tables:
+        */
+       add     r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ENTRY_ORDER)
+       ldr     r6, =(_end - 1)
++
++      /* For XIP, kernel_sec_start/kernel_sec_end are currently in RO memory */
++#ifndef CONFIG_XIP_KERNEL
+       adr_l   r5, kernel_sec_start            @ _pa(kernel_sec_start)
+ #if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
+       str     r8, [r5, #4]                    @ Save physical start of kernel (BE)
+ #else
+       str     r8, [r5]                        @ Save physical start of kernel (LE)
++#endif
+ #endif
+       orr     r3, r8, r7                      @ Add the MMU flags
+       add     r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ENTRY_ORDER)
+@@ -264,6 +268,7 @@ __create_page_tables:
+       add     r3, r3, #1 << SECTION_SHIFT
+       cmp     r0, r6
+       bls     1b
++#ifndef CONFIG_XIP_KERNEL
+       eor     r3, r3, r7                      @ Remove the MMU flags
+       adr_l   r5, kernel_sec_end              @ _pa(kernel_sec_end)
+ #if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
+@@ -271,8 +276,7 @@ __create_page_tables:
+ #else
+       str     r3, [r5]                        @ Save physical end of kernel (LE)
+ #endif
+-
+-#ifdef CONFIG_XIP_KERNEL
++#else
+       /*
+        * Map the kernel image separately as it is not located in RAM.
+        */
+diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
+index 463fc2a8448f0..a39a7043f1896 100644
+--- a/arch/arm/mm/mmu.c
++++ b/arch/arm/mm/mmu.c
+@@ -1401,18 +1401,6 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
+               create_mapping(&map);
+       }
+ 
+-      /*
+-       * Map the kernel if it is XIP.
+-       * It is always first in the modulearea.
+-       */
+-#ifdef CONFIG_XIP_KERNEL
+-      map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
+-      map.virtual = MODULES_VADDR;
+-      map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+-      map.type = MT_ROM;
+-      create_mapping(&map);
+-#endif
+-
+       /*
+        * Map the cache flushing regions.
+        */
+@@ -1602,12 +1590,27 @@ static void __init map_kernel(void)
+        * This will only persist until we turn on proper memory management later on
+        * and we remap the whole kernel with page granularity.
+        */
++#ifdef CONFIG_XIP_KERNEL
++      phys_addr_t kernel_nx_start = kernel_sec_start;
++#else
+       phys_addr_t kernel_x_start = kernel_sec_start;
+       phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+       phys_addr_t kernel_nx_start = kernel_x_end;
++#endif
+       phys_addr_t kernel_nx_end = kernel_sec_end;
+       struct map_desc map;
+ 
++      /*
++       * Map the kernel if it is XIP.
++       * It is always first in the modulearea.
++       */
++#ifdef CONFIG_XIP_KERNEL
++      map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
++      map.virtual = MODULES_VADDR;
++      map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
++      map.type = MT_ROM;
++      create_mapping(&map);
++#else
+       map.pfn = __phys_to_pfn(kernel_x_start);
+       map.virtual = __phys_to_virt(kernel_x_start);
+       map.length = kernel_x_end - kernel_x_start;
+@@ -1617,7 +1620,7 @@ static void __init map_kernel(void)
+       /* If the nx part is small it may end up covered by the tail of the RWX section */
+       if (kernel_x_end == kernel_nx_end)
+               return;
+-
++#endif
+       map.pfn = __phys_to_pfn(kernel_nx_start);
+       map.virtual = __phys_to_virt(kernel_nx_start);
+       map.length = kernel_nx_end - kernel_nx_start;
+@@ -1762,6 +1765,11 @@ void __init paging_init(const struct machine_desc *mdesc)
+ {
+       void *zero_page;
+ 
++#ifdef CONFIG_XIP_KERNEL
++      /* Store the kernel RW RAM region start/end in these variables */
++      kernel_sec_start = CONFIG_PHYS_OFFSET & SECTION_MASK;
++      kernel_sec_end = round_up(__pa(_end), SECTION_SIZE);
++#endif
+       pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n",
+                kernel_sec_start, kernel_sec_end);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch b/queue-6.1/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch

new file mode 100644 (file)

index 0000000..d2d3676
--- /dev/null
+++ b/queue-6.1/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch
@@ -0,0 +1,39 @@
+From be82b07c0320a2ef291bb9cde6975924454ecdf7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Nov 2024 11:19:54 -0500
+Subject: Bluetooth: hci_core: Fix calling mgmt_device_connected
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 7967dc8f797f454d4f4acec15c7df0cdf4801617 ]
+
+Since 61a939c68ee0 ("Bluetooth: Queue incoming ACL data until
+BT_CONNECTED state is reached") there is no long the need to call
+mgmt_device_connected as ACL data will be queued until BT_CONNECTED
+state.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219458
+Link: https://github.com/bluez/bluez/issues/1014
+Fixes: 333b4fd11e89 ("Bluetooth: L2CAP: Fix uaf in l2cap_connect")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_core.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index f93f3e7a3d905..789f7f4a09089 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -3846,8 +3846,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
+ 
+       hci_dev_lock(hdev);
+       conn = hci_conn_hash_lookup_handle(hdev, handle);
+-      if (conn && hci_dev_test_flag(hdev, HCI_MGMT))
+-              mgmt_device_connected(hdev, conn, NULL, 0);
+       hci_dev_unlock(hdev);
+ 
+       if (conn) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-hci_event-remove-code-to-removed-config_bt.patch b/queue-6.1/bluetooth-hci_event-remove-code-to-removed-config_bt.patch

new file mode 100644 (file)

index 0000000..3863734
--- /dev/null
+++ b/queue-6.1/bluetooth-hci_event-remove-code-to-removed-config_bt.patch
@@ -0,0 +1,206 @@
+From b17dc856b99c631d5e06af11c12aed434bd8660b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 Feb 2024 14:42:11 +0100
+Subject: Bluetooth: hci_event: Remove code to removed CONFIG_BT_HS
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+[ Upstream commit f4b0c2b4cd78b75acde56c2ee5aa732b6fb2a6a9 ]
+
+Commit cec9f3c5561d ("Bluetooth: Remove BT_HS") removes config BT_HS, but
+misses two "ifdef BT_HS" blocks in hci_event.c.
+
+Remove this dead code from this removed config option.
+
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Stable-dep-of: 7967dc8f797f ("Bluetooth: hci_core: Fix calling mgmt_device_connected")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 163 --------------------------------------
+ 1 file changed, 163 deletions(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 7c1df481ebe9d..b6fe5e15981f8 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -5648,150 +5648,6 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, void *edata,
+       hci_dev_unlock(hdev);
+ }
+ 
+-#if IS_ENABLED(CONFIG_BT_HS)
+-static void hci_chan_selected_evt(struct hci_dev *hdev, void *data,
+-                                struct sk_buff *skb)
+-{
+-      struct hci_ev_channel_selected *ev = data;
+-      struct hci_conn *hcon;
+-
+-      bt_dev_dbg(hdev, "handle 0x%2.2x", ev->phy_handle);
+-
+-      hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+-      if (!hcon)
+-              return;
+-
+-      amp_read_loc_assoc_final_data(hdev, hcon);
+-}
+-
+-static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data,
+-                                    struct sk_buff *skb)
+-{
+-      struct hci_ev_phy_link_complete *ev = data;
+-      struct hci_conn *hcon, *bredr_hcon;
+-
+-      bt_dev_dbg(hdev, "handle 0x%2.2x status 0x%2.2x", ev->phy_handle,
+-                 ev->status);
+-
+-      hci_dev_lock(hdev);
+-
+-      hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+-      if (!hcon)
+-              goto unlock;
+-
+-      if (!hcon->amp_mgr)
+-              goto unlock;
+-
+-      if (ev->status) {
+-              hci_conn_del(hcon);
+-              goto unlock;
+-      }
+-
+-      bredr_hcon = hcon->amp_mgr->l2cap_conn->hcon;
+-
+-      hcon->state = BT_CONNECTED;
+-      bacpy(&hcon->dst, &bredr_hcon->dst);
+-
+-      hci_conn_hold(hcon);
+-      hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+-      hci_conn_drop(hcon);
+-
+-      hci_debugfs_create_conn(hcon);
+-      hci_conn_add_sysfs(hcon);
+-
+-      amp_physical_cfm(bredr_hcon, hcon);
+-
+-unlock:
+-      hci_dev_unlock(hdev);
+-}
+-
+-static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data,
+-                                   struct sk_buff *skb)
+-{
+-      struct hci_ev_logical_link_complete *ev = data;
+-      struct hci_conn *hcon;
+-      struct hci_chan *hchan;
+-      struct amp_mgr *mgr;
+-
+-      bt_dev_dbg(hdev, "log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x",
+-                 le16_to_cpu(ev->handle), ev->phy_handle, ev->status);
+-
+-      hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+-      if (!hcon)
+-              return;
+-
+-      /* Create AMP hchan */
+-      hchan = hci_chan_create(hcon);
+-      if (!hchan)
+-              return;
+-
+-      hchan->handle = le16_to_cpu(ev->handle);
+-      hchan->amp = true;
+-
+-      BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan);
+-
+-      mgr = hcon->amp_mgr;
+-      if (mgr && mgr->bredr_chan) {
+-              struct l2cap_chan *bredr_chan = mgr->bredr_chan;
+-
+-              l2cap_chan_lock(bredr_chan);
+-
+-              bredr_chan->conn->mtu = hdev->block_mtu;
+-              l2cap_logical_cfm(bredr_chan, hchan, 0);
+-              hci_conn_hold(hcon);
+-
+-              l2cap_chan_unlock(bredr_chan);
+-      }
+-}
+-
+-static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data,
+-                                           struct sk_buff *skb)
+-{
+-      struct hci_ev_disconn_logical_link_complete *ev = data;
+-      struct hci_chan *hchan;
+-
+-      bt_dev_dbg(hdev, "handle 0x%4.4x status 0x%2.2x",
+-                 le16_to_cpu(ev->handle), ev->status);
+-
+-      if (ev->status)
+-              return;
+-
+-      hci_dev_lock(hdev);
+-
+-      hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
+-      if (!hchan || !hchan->amp)
+-              goto unlock;
+-
+-      amp_destroy_logical_link(hchan, ev->reason);
+-
+-unlock:
+-      hci_dev_unlock(hdev);
+-}
+-
+-static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data,
+-                                           struct sk_buff *skb)
+-{
+-      struct hci_ev_disconn_phy_link_complete *ev = data;
+-      struct hci_conn *hcon;
+-
+-      bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+-
+-      if (ev->status)
+-              return;
+-
+-      hci_dev_lock(hdev);
+-
+-      hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+-      if (hcon && hcon->type == AMP_LINK) {
+-              hcon->state = BT_CLOSED;
+-              hci_disconn_cfm(hcon, ev->reason);
+-              hci_conn_del(hcon);
+-      }
+-
+-      hci_dev_unlock(hdev);
+-}
+-#endif
+-
+ static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
+                               u8 bdaddr_type, bdaddr_t *local_rpa)
+ {
+@@ -7473,25 +7329,6 @@ static const struct hci_ev {
+       /* [0x3e = HCI_EV_LE_META] */
+       HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt,
+                     sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE),
+-#if IS_ENABLED(CONFIG_BT_HS)
+-      /* [0x40 = HCI_EV_PHY_LINK_COMPLETE] */
+-      HCI_EV(HCI_EV_PHY_LINK_COMPLETE, hci_phy_link_complete_evt,
+-             sizeof(struct hci_ev_phy_link_complete)),
+-      /* [0x41 = HCI_EV_CHANNEL_SELECTED] */
+-      HCI_EV(HCI_EV_CHANNEL_SELECTED, hci_chan_selected_evt,
+-             sizeof(struct hci_ev_channel_selected)),
+-      /* [0x42 = HCI_EV_DISCONN_PHY_LINK_COMPLETE] */
+-      HCI_EV(HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE,
+-             hci_disconn_loglink_complete_evt,
+-             sizeof(struct hci_ev_disconn_logical_link_complete)),
+-      /* [0x45 = HCI_EV_LOGICAL_LINK_COMPLETE] */
+-      HCI_EV(HCI_EV_LOGICAL_LINK_COMPLETE, hci_loglink_complete_evt,
+-             sizeof(struct hci_ev_logical_link_complete)),
+-      /* [0x46 = HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE] */
+-      HCI_EV(HCI_EV_DISCONN_PHY_LINK_COMPLETE,
+-             hci_disconn_phylink_complete_evt,
+-             sizeof(struct hci_ev_disconn_phy_link_complete)),
+-#endif
+       /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */
+       HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
+              sizeof(struct hci_ev_num_comp_blocks)),
+-- 
+2.43.0
+
diff --git a/queue-6.1/bonding-add-ns-target-multicast-address-to-slave-dev.patch b/queue-6.1/bonding-add-ns-target-multicast-address-to-slave-dev.patch

new file mode 100644 (file)

index 0000000..2ce7562
--- /dev/null
+++ b/queue-6.1/bonding-add-ns-target-multicast-address-to-slave-dev.patch
@@ -0,0 +1,237 @@
+From 3fa74edcbfe01432dd5bebaa61e8cc076bd52ca8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Nov 2024 10:16:49 +0000
+Subject: bonding: add ns target multicast address to slave device
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 8eb36164d1a6769a20ed43033510067ff3dab9ee ]
+
+Commit 4598380f9c54 ("bonding: fix ns validation on backup slaves")
+tried to resolve the issue where backup slaves couldn't be brought up when
+receiving IPv6 Neighbor Solicitation (NS) messages. However, this fix only
+worked for drivers that receive all multicast messages, such as the veth
+interface.
+
+For standard drivers, the NS multicast message is silently dropped because
+the slave device is not a member of the NS target multicast group.
+
+To address this, we need to make the slave device join the NS target
+multicast group, ensuring it can receive these IPv6 NS messages to validate
+the slave’s status properly.
+
+There are three policies before joining the multicast group:
+1. All settings must be under active-backup mode (alb and tlb do not support
+   arp_validate), with backup slaves and slaves supporting multicast.
+2. We can add or remove multicast groups when arp_validate changes.
+3. Other operations, such as enslaving, releasing, or setting NS targets,
+   need to be guarded by arp_validate.
+
+Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_main.c    | 16 +++++-
+ drivers/net/bonding/bond_options.c | 82 +++++++++++++++++++++++++++++-
+ include/net/bond_options.h         |  2 +
+ 3 files changed, 98 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 51d6cf0a3fb4e..26a9f99882e61 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -919,6 +919,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+ 
+               if (bond->dev->flags & IFF_UP)
+                       bond_hw_addr_flush(bond->dev, old_active->dev);
++
++              bond_slave_ns_maddrs_add(bond, old_active);
+       }
+ 
+       if (new_active) {
+@@ -935,6 +937,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+                       dev_mc_sync(new_active->dev, bond->dev);
+                       netif_addr_unlock_bh(bond->dev);
+               }
++
++              bond_slave_ns_maddrs_del(bond, new_active);
+       }
+ }
+ 
+@@ -2231,6 +2235,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+       bond_compute_features(bond);
+       bond_set_carrier(bond);
+ 
++      /* Needs to be called before bond_select_active_slave(), which will
++       * remove the maddrs if the slave is selected as active slave.
++       */
++      bond_slave_ns_maddrs_add(bond, new_slave);
++
+       if (bond_uses_primary(bond)) {
+               block_netpoll_tx();
+               bond_select_active_slave(bond);
+@@ -2240,7 +2249,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+       if (bond_mode_can_use_xmit_hash(bond))
+               bond_update_slave_arr(bond, NULL);
+ 
+-
+       if (!slave_dev->netdev_ops->ndo_bpf ||
+           !slave_dev->netdev_ops->ndo_xdp_xmit) {
+               if (bond->xdp_prog) {
+@@ -2436,6 +2444,12 @@ static int __bond_release_one(struct net_device *bond_dev,
+       if (oldcurrent == slave)
+               bond_change_active_slave(bond, NULL);
+ 
++      /* Must be called after bond_change_active_slave () as the slave
++       * might change from an active slave to a backup slave. Then it is
++       * necessary to clear the maddrs on the backup slave.
++       */
++      bond_slave_ns_maddrs_del(bond, slave);
++
+       if (bond_is_lb(bond)) {
+               /* Must be called only after the slave has been
+                * detached from the list and the curr_active_slave
+diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
+index 06c4cd0f00024..c8536dc7d860d 100644
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -15,6 +15,7 @@
+ #include <linux/sched/signal.h>
+ 
+ #include <net/bonding.h>
++#include <net/ndisc.h>
+ 
+ static int bond_option_active_slave_set(struct bonding *bond,
+                                       const struct bond_opt_value *newval);
+@@ -1230,6 +1231,68 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
+ }
+ 
+ #if IS_ENABLED(CONFIG_IPV6)
++static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *slave)
++{
++      return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
++             !bond_is_active_slave(slave) &&
++             slave->dev->flags & IFF_MULTICAST;
++}
++
++static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
++{
++      struct in6_addr *targets = bond->params.ns_targets;
++      char slot_maddr[MAX_ADDR_LEN];
++      int i;
++
++      if (!slave_can_set_ns_maddr(bond, slave))
++              return;
++
++      for (i = 0; i < BOND_MAX_NS_TARGETS; i++) {
++              if (ipv6_addr_any(&targets[i]))
++                      break;
++
++              if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
++                      if (add)
++                              dev_mc_add(slave->dev, slot_maddr);
++                      else
++                              dev_mc_del(slave->dev, slot_maddr);
++              }
++      }
++}
++
++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave)
++{
++      if (!bond->params.arp_validate)
++              return;
++      slave_set_ns_maddrs(bond, slave, true);
++}
++
++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave)
++{
++      if (!bond->params.arp_validate)
++              return;
++      slave_set_ns_maddrs(bond, slave, false);
++}
++
++static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
++                             struct in6_addr *target, struct in6_addr *slot)
++{
++      char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
++
++      if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
++              return;
++
++      /* remove the previous maddr from slave */
++      if (!ipv6_addr_any(slot) &&
++          !ndisc_mc_map(slot, slot_maddr, slave->dev, 0))
++              dev_mc_del(slave->dev, slot_maddr);
++
++      /* add new maddr on slave if target is set */
++      if (!ipv6_addr_any(target) &&
++          !ndisc_mc_map(target, target_maddr, slave->dev, 0))
++              dev_mc_add(slave->dev, target_maddr);
++}
++
+ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
+                                           struct in6_addr *target,
+                                           unsigned long last_rx)
+@@ -1239,8 +1302,10 @@ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
+       struct slave *slave;
+ 
+       if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
+-              bond_for_each_slave(bond, slave, iter)
++              bond_for_each_slave(bond, slave, iter) {
+                       slave->target_last_arp_rx[slot] = last_rx;
++                      slave_set_ns_maddr(bond, slave, target, &targets[slot]);
++              }
+               targets[slot] = *target;
+       }
+ }
+@@ -1292,15 +1357,30 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond,
+ {
+       return -EPERM;
+ }
++
++static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) {}
++
++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave) {}
++
++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) {}
+ #endif
+ 
+ static int bond_option_arp_validate_set(struct bonding *bond,
+                                       const struct bond_opt_value *newval)
+ {
++      bool changed = !!bond->params.arp_validate != !!newval->value;
++      struct list_head *iter;
++      struct slave *slave;
++
+       netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
+                  newval->string, newval->value);
+       bond->params.arp_validate = newval->value;
+ 
++      if (changed) {
++              bond_for_each_slave(bond, slave, iter)
++                      slave_set_ns_maddrs(bond, slave, !!bond->params.arp_validate);
++      }
++
+       return 0;
+ }
+ 
+diff --git a/include/net/bond_options.h b/include/net/bond_options.h
+index 69292ecc03257..f631d9f099410 100644
+--- a/include/net/bond_options.h
++++ b/include/net/bond_options.h
+@@ -160,5 +160,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond);
+ #if IS_ENABLED(CONFIG_IPV6)
+ void bond_option_ns_ip6_targets_clear(struct bonding *bond);
+ #endif
++void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave);
++void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave);
+ 
+ #endif /* _NET_BOND_OPTIONS_H */
+-- 
+2.43.0
+
diff --git a/queue-6.1/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch b/queue-6.1/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch

new file mode 100644 (file)

index 0000000..41800c2
--- /dev/null
+++ b/queue-6.1/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch
@@ -0,0 +1,47 @@
+From c51d6f7b4da8519b0826470c96b313ed18938bc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2024 15:28:06 +0800
+Subject: drm/rockchip: vop: Fix a dereferenced before check warning
+
+From: Andy Yan <andy.yan@rock-chips.com>
+
+[ Upstream commit ab1c793f457f740ab7108cc0b1340a402dbf484d ]
+
+The 'state' can't be NULL, we should check crtc_state.
+
+Fix warning:
+drivers/gpu/drm/rockchip/rockchip_drm_vop.c:1096
+vop_plane_atomic_async_check() warn: variable dereferenced before check
+'state' (see line 1077)
+
+Fixes: 5ddb0bd4ddc3 ("drm/atomic: Pass the full state to planes async atomic check and update")
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241021072818.61621-1-andyshrk@163.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+index b2289a523c408..e5b2112af1381 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+@@ -1080,10 +1080,10 @@ static int vop_plane_atomic_async_check(struct drm_plane *plane,
+       if (!plane->state->fb)
+               return -EINVAL;
+ 
+-      if (state)
+-              crtc_state = drm_atomic_get_existing_crtc_state(state,
+-                                                              new_plane_state->crtc);
+-      else /* Special case for asynchronous cursor updates. */
++      crtc_state = drm_atomic_get_existing_crtc_state(state, new_plane_state->crtc);
++
++      /* Special case for asynchronous cursor updates. */
++      if (!crtc_state)
+               crtc_state = plane->crtc->state;
+ 
+       return drm_atomic_helper_check_plane_state(plane->state, crtc_state,
+-- 
+2.43.0
+
diff --git a/queue-6.1/mptcp-error-out-earlier-on-disconnect.patch b/queue-6.1/mptcp-error-out-earlier-on-disconnect.patch

new file mode 100644 (file)

index 0000000..89465cc
--- /dev/null
+++ b/queue-6.1/mptcp-error-out-earlier-on-disconnect.patch
@@ -0,0 +1,120 @@
+From e15a0ac941898394f13eb736cb6496824613b06b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Nov 2024 11:58:16 +0100
+Subject: mptcp: error out earlier on disconnect
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 581302298524e9d77c4c44ff5156a6cd112227ae ]
+
+Eric reported a division by zero splat in the MPTCP protocol:
+
+Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI
+CPU: 1 UID: 0 PID: 6094 Comm: syz-executor317 Not tainted
+6.12.0-rc5-syzkaller-00291-g05b92660cdfe #0
+Hardware name: Google Google Compute Engine/Google Compute Engine,
+BIOS Google 09/13/2024
+RIP: 0010:__tcp_select_window+0x5b4/0x1310 net/ipv4/tcp_output.c:3163
+Code: f6 44 01 e3 89 df e8 9b 75 09 f8 44 39 f3 0f 8d 11 ff ff ff e8
+0d 74 09 f8 45 89 f4 e9 04 ff ff ff e8 00 74 09 f8 44 89 f0 99 <f7> 7c
+24 14 41 29 d6 45 89 f4 e9 ec fe ff ff e8 e8 73 09 f8 48 89
+RSP: 0018:ffffc900041f7930 EFLAGS: 00010293
+RAX: 0000000000017e67 RBX: 0000000000017e67 RCX: ffffffff8983314b
+RDX: 0000000000000000 RSI: ffffffff898331b0 RDI: 0000000000000004
+RBP: 00000000005d6000 R08: 0000000000000004 R09: 0000000000017e67
+R10: 0000000000003e80 R11: 0000000000000000 R12: 0000000000003e80
+R13: ffff888031d9b440 R14: 0000000000017e67 R15: 00000000002eb000
+FS: 00007feb5d7f16c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007feb5d8adbb8 CR3: 0000000074e4c000 CR4: 00000000003526f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<TASK>
+__tcp_cleanup_rbuf+0x3e7/0x4b0 net/ipv4/tcp.c:1493
+mptcp_rcv_space_adjust net/mptcp/protocol.c:2085 [inline]
+mptcp_recvmsg+0x2156/0x2600 net/mptcp/protocol.c:2289
+inet_recvmsg+0x469/0x6a0 net/ipv4/af_inet.c:885
+sock_recvmsg_nosec net/socket.c:1051 [inline]
+sock_recvmsg+0x1b2/0x250 net/socket.c:1073
+__sys_recvfrom+0x1a5/0x2e0 net/socket.c:2265
+__do_sys_recvfrom net/socket.c:2283 [inline]
+__se_sys_recvfrom net/socket.c:2279 [inline]
+__x64_sys_recvfrom+0xe0/0x1c0 net/socket.c:2279
+do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83
+entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7feb5d857559
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48
+89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
+01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007feb5d7f1208 EFLAGS: 00000246 ORIG_RAX: 000000000000002d
+RAX: ffffffffffffffda RBX: 00007feb5d8e1318 RCX: 00007feb5d857559
+RDX: 000000800000000e RSI: 0000000000000000 RDI: 0000000000000003
+RBP: 00007feb5d8e1310 R08: 0000000000000000 R09: ffffffff81000000
+R10: 0000000000000100 R11: 0000000000000246 R12: 00007feb5d8e131c
+R13: 00007feb5d8ae074 R14: 000000800000000e R15: 00000000fffffdef
+
+and provided a nice reproducer.
+
+The root cause is the current bad handling of racing disconnect.
+After the blamed commit below, sk_wait_data() can return (with
+error) with the underlying socket disconnected and a zero rcv_mss.
+
+Catch the error and return without performing any additional
+operations on the current socket.
+
+Reported-by: Eric Dumazet <edumazet@google.com>
+Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/8c82ecf71662ecbc47bf390f9905de70884c9f2d.1731060874.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d68e93dab88c3..78ac5c538e139 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2180,7 +2180,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+               cmsg_flags = MPTCP_CMSG_INQ;
+ 
+       while (copied < len) {
+-              int bytes_read;
++              int err, bytes_read;
+ 
+               bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
+               if (unlikely(bytes_read < 0)) {
+@@ -2245,9 +2245,16 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+               }
+ 
+               pr_debug("block timeout %ld\n", timeo);
+-              sk_wait_data(sk, &timeo, NULL);
++              mptcp_rcv_space_adjust(msk, copied);
++              err = sk_wait_data(sk, &timeo, NULL);
++              if (err < 0) {
++                      err = copied ? : err;
++                      goto out_err;
++              }
+       }
+ 
++      mptcp_rcv_space_adjust(msk, copied);
++
+ out_err:
+       if (cmsg_flags && copied >= 0) {
+               if (cmsg_flags & MPTCP_CMSG_TS)
+@@ -2263,8 +2270,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+       pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
+                msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
+                skb_queue_empty(&msk->receive_queue), copied);
+-      if (!(flags & MSG_PEEK))
+-              mptcp_rcv_space_adjust(msk, copied);
+ 
+       release_sock(sk);
+       return copied;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5-fs-lock-fte-when-checking-if-active.patch b/queue-6.1/net-mlx5-fs-lock-fte-when-checking-if-active.patch

new file mode 100644 (file)

index 0000000..ac8f011
--- /dev/null
+++ b/queue-6.1/net-mlx5-fs-lock-fte-when-checking-if-active.patch
@@ -0,0 +1,130 @@
+From 4b79a5732cea9509c9e0c315f19f5bbefee3e00e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2024 20:35:23 +0200
+Subject: net/mlx5: fs, lock FTE when checking if active
+
+From: Mark Bloch <mbloch@nvidia.com>
+
+[ Upstream commit 9ca314419930f9135727e39d77e66262d5f7bef6 ]
+
+The referenced commits introduced a two-step process for deleting FTEs:
+
+- Lock the FTE, delete it from hardware, set the hardware deletion function
+  to NULL and unlock the FTE.
+- Lock the parent flow group, delete the software copy of the FTE, and
+  remove it from the xarray.
+
+However, this approach encounters a race condition if a rule with the same
+match value is added simultaneously. In this scenario, fs_core may set the
+hardware deletion function to NULL prematurely, causing a panic during
+subsequent rule deletions.
+
+To prevent this, ensure the active flag of the FTE is checked under a lock,
+which will prevent the fs_core layer from attaching a new steering rule to
+an FTE that is in the process of deletion.
+
+[  438.967589] MOSHE: 2496 mlx5_del_flow_rules del_hw_func
+[  438.968205] ------------[ cut here ]------------
+[  438.968654] refcount_t: decrement hit 0; leaking memory.
+[  438.969249] WARNING: CPU: 0 PID: 8957 at lib/refcount.c:31 refcount_warn_saturate+0xfb/0x110
+[  438.970054] Modules linked in: act_mirred cls_flower act_gact sch_ingress openvswitch nsh mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core zram zsmalloc fuse [last unloaded: cls_flower]
+[  438.973288] CPU: 0 UID: 0 PID: 8957 Comm: tc Not tainted 6.12.0-rc1+ #8
+[  438.973888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[  438.974874] RIP: 0010:refcount_warn_saturate+0xfb/0x110
+[  438.975363] Code: 40 66 3b 82 c6 05 16 e9 4d 01 01 e8 1f 7c a0 ff 0f 0b c3 cc cc cc cc 48 c7 c7 10 66 3b 82 c6 05 fd e8 4d 01 01 e8 05 7c a0 ff <0f> 0b c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 90
+[  438.976947] RSP: 0018:ffff888124a53610 EFLAGS: 00010286
+[  438.977446] RAX: 0000000000000000 RBX: ffff888119d56de0 RCX: 0000000000000000
+[  438.978090] RDX: ffff88852c828700 RSI: ffff88852c81b3c0 RDI: ffff88852c81b3c0
+[  438.978721] RBP: ffff888120fa0e88 R08: 0000000000000000 R09: ffff888124a534b0
+[  438.979353] R10: 0000000000000001 R11: 0000000000000001 R12: ffff888119d56de0
+[  438.979979] R13: ffff888120fa0ec0 R14: ffff888120fa0ee8 R15: ffff888119d56de0
+[  438.980607] FS:  00007fe6dcc0f800(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000
+[  438.983984] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  438.984544] CR2: 00000000004275e0 CR3: 0000000186982001 CR4: 0000000000372eb0
+[  438.985205] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[  438.985842] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[  438.986507] Call Trace:
+[  438.986799]  <TASK>
+[  438.987070]  ? __warn+0x7d/0x110
+[  438.987426]  ? refcount_warn_saturate+0xfb/0x110
+[  438.987877]  ? report_bug+0x17d/0x190
+[  438.988261]  ? prb_read_valid+0x17/0x20
+[  438.988659]  ? handle_bug+0x53/0x90
+[  438.989054]  ? exc_invalid_op+0x14/0x70
+[  438.989458]  ? asm_exc_invalid_op+0x16/0x20
+[  438.989883]  ? refcount_warn_saturate+0xfb/0x110
+[  438.990348]  mlx5_del_flow_rules+0x2f7/0x340 [mlx5_core]
+[  438.990932]  __mlx5_eswitch_del_rule+0x49/0x170 [mlx5_core]
+[  438.991519]  ? mlx5_lag_is_sriov+0x3c/0x50 [mlx5_core]
+[  438.992054]  ? xas_load+0x9/0xb0
+[  438.992407]  mlx5e_tc_rule_unoffload+0x45/0xe0 [mlx5_core]
+[  438.993037]  mlx5e_tc_del_fdb_flow+0x2a6/0x2e0 [mlx5_core]
+[  438.993623]  mlx5e_flow_put+0x29/0x60 [mlx5_core]
+[  438.994161]  mlx5e_delete_flower+0x261/0x390 [mlx5_core]
+[  438.994728]  tc_setup_cb_destroy+0xb9/0x190
+[  438.995150]  fl_hw_destroy_filter+0x94/0xc0 [cls_flower]
+[  438.995650]  fl_change+0x11a4/0x13c0 [cls_flower]
+[  438.996105]  tc_new_tfilter+0x347/0xbc0
+[  438.996503]  ? ___slab_alloc+0x70/0x8c0
+[  438.996929]  rtnetlink_rcv_msg+0xf9/0x3e0
+[  438.997339]  ? __netlink_sendskb+0x4c/0x70
+[  438.997751]  ? netlink_unicast+0x286/0x2d0
+[  438.998171]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
+[  438.998625]  netlink_rcv_skb+0x54/0x100
+[  438.999020]  netlink_unicast+0x203/0x2d0
+[  438.999421]  netlink_sendmsg+0x1e4/0x420
+[  438.999820]  __sock_sendmsg+0xa1/0xb0
+[  439.000203]  ____sys_sendmsg+0x207/0x2a0
+[  439.000600]  ? copy_msghdr_from_user+0x6d/0xa0
+[  439.001072]  ___sys_sendmsg+0x80/0xc0
+[  439.001459]  ? ___sys_recvmsg+0x8b/0xc0
+[  439.001848]  ? generic_update_time+0x4d/0x60
+[  439.002282]  __sys_sendmsg+0x51/0x90
+[  439.002658]  do_syscall_64+0x50/0x110
+[  439.003040]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes")
+Fixes: cefc23554fc2 ("net/mlx5: Fix FTE cleanup")
+Signed-off-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20241107183527.676877-4-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index 164e10b5f9b7f..50fdc3cbb778e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -1880,13 +1880,22 @@ lookup_fte_locked(struct mlx5_flow_group *g,
+               fte_tmp = NULL;
+               goto out;
+       }
++
++      nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
++
+       if (!fte_tmp->node.active) {
++              up_write_ref_node(&fte_tmp->node, false);
++
++              if (take_write)
++                      up_write_ref_node(&g->node, false);
++              else
++                      up_read_ref_node(&g->node);
++
+               tree_put_node(&fte_tmp->node, false);
+-              fte_tmp = NULL;
+-              goto out;
++
++              return NULL;
+       }
+ 
+-      nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+ out:
+       if (take_write)
+               up_write_ref_node(&g->node, false);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch b/queue-6.1/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch

new file mode 100644 (file)

index 0000000..bc20fd9
--- /dev/null
+++ b/queue-6.1/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch
@@ -0,0 +1,71 @@
+From 19a574dc355ce7130b5e72b8ac48a782f232ab16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2024 20:35:26 +0200
+Subject: net/mlx5e: CT: Fix null-ptr-deref in add rule err flow
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Moshe Shemesh <moshe@nvidia.com>
+
+[ Upstream commit e99c6873229fe0482e7ceb7d5600e32d623ed9d9 ]
+
+In error flow of mlx5_tc_ct_entry_add_rule(), in case ct_rule_add()
+callback returns error, zone_rule->attr is used uninitiated. Fix it to
+use attr which has the needed pointer value.
+
+Kernel log:
+ BUG: kernel NULL pointer dereference, address: 0000000000000110
+ RIP: 0010:mlx5_tc_ct_entry_add_rule+0x2b1/0x2f0 [mlx5_core]
+…
+ Call Trace:
+  <TASK>
+  ? __die+0x20/0x70
+  ? page_fault_oops+0x150/0x3e0
+  ? exc_page_fault+0x74/0x140
+  ? asm_exc_page_fault+0x22/0x30
+  ? mlx5_tc_ct_entry_add_rule+0x2b1/0x2f0 [mlx5_core]
+  ? mlx5_tc_ct_entry_add_rule+0x1d5/0x2f0 [mlx5_core]
+  mlx5_tc_ct_block_flow_offload+0xc6a/0xf90 [mlx5_core]
+  ? nf_flow_offload_tuple+0xd8/0x190 [nf_flow_table]
+  nf_flow_offload_tuple+0xd8/0x190 [nf_flow_table]
+  flow_offload_work_handler+0x142/0x320 [nf_flow_table]
+  ? finish_task_switch.isra.0+0x15b/0x2b0
+  process_one_work+0x16c/0x320
+  worker_thread+0x28c/0x3a0
+  ? __pfx_worker_thread+0x10/0x10
+  kthread+0xb8/0xf0
+  ? __pfx_kthread+0x10/0x10
+  ret_from_fork+0x2d/0x50
+  ? __pfx_kthread+0x10/0x10
+  ret_from_fork_asm+0x1a/0x30
+  </TASK>
+
+Fixes: 7fac5c2eced3 ("net/mlx5: CT: Avoid reusing modify header context for natted entries")
+Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20241107183527.676877-7-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+index f01f7dfdbcf88..b011e0d2b620e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+@@ -862,7 +862,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+       return 0;
+ 
+ err_rule:
+-      mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
++      mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr, zone_rule->mh);
+       mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
+ err_mod_hdr:
+       kfree(attr);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch b/queue-6.1/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch

new file mode 100644 (file)

index 0000000..aaa0ef9
--- /dev/null
+++ b/queue-6.1/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch
@@ -0,0 +1,72 @@
+From 5e823cb7e0f3d53303ed4150512f0fe7b2e8cf79 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2024 20:35:24 +0200
+Subject: net/mlx5e: kTLS, Fix incorrect page refcounting
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit dd6e972cc5890d91d6749bb48e3912721c4e4b25 ]
+
+The kTLS tx handling code is using a mix of get_page() and
+page_ref_inc() APIs to increment the page reference. But on the release
+path (mlx5e_ktls_tx_handle_resync_dump_comp()), only put_page() is used.
+
+This is an issue when using pages from large folios: the get_page()
+references are stored on the folio page while the page_ref_inc()
+references are stored directly in the given page. On release the folio
+page will be dereferenced too many times.
+
+This was found while doing kTLS testing with sendfile() + ZC when the
+served file was read from NFS on a kernel with NFS large folios support
+(commit 49b29a573da8 ("nfs: add support for large folios")).
+
+Fixes: 84d1bb2b139e ("net/mlx5e: kTLS, Limit DUMP wqe size")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20241107183527.676877-5-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c    | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+index 2e0335246967b..6d56d4a9977b0 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+@@ -665,7 +665,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+       while (remaining > 0) {
+               skb_frag_t *frag = &record->frags[i];
+ 
+-              get_page(skb_frag_page(frag));
++              page_ref_inc(skb_frag_page(frag));
+               remaining -= skb_frag_size(frag);
+               info->frags[i++] = *frag;
+       }
+@@ -768,7 +768,7 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+       stats = sq->stats;
+ 
+       mlx5e_tx_dma_unmap(sq->pdev, dma);
+-      put_page(wi->resync_dump_frag_page);
++      page_ref_dec(wi->resync_dump_frag_page);
+       stats->tls_dump_packets++;
+       stats->tls_dump_bytes += wi->num_bytes;
+ }
+@@ -821,12 +821,12 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ 
+ err_out:
+       for (; i < info.nr_frags; i++)
+-              /* The put_page() here undoes the page ref obtained in tx_sync_info_get().
++              /* The page_ref_dec() here undoes the page ref obtained in tx_sync_info_get().
+                * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
+                * released only upon their completions (or in mlx5e_free_txqsq_descs,
+                * if channel closes).
+                */
+-              put_page(skb_frag_page(&info.frags[i]));
++              page_ref_dec(skb_frag_page(&info.frags[i]));
+ 
+       return MLX5E_KTLS_SYNC_FAIL;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch b/queue-6.1/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch

new file mode 100644 (file)

index 0000000..5492ed2
--- /dev/null
+++ b/queue-6.1/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch
@@ -0,0 +1,101 @@
+From 1718701dfdc57c89227a7a785697427b09cc29a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Nov 2024 18:28:36 +0100
+Subject: net: sched: cls_u32: Fix u32's systematic failure to free IDR entries
+ for hnodes.
+
+From: Alexandre Ferrieux <alexandre.ferrieux@gmail.com>
+
+[ Upstream commit 73af53d82076bbe184d9ece9e14b0dc8599e6055 ]
+
+To generate hnode handles (in gen_new_htid()), u32 uses IDR and
+encodes the returned small integer into a structured 32-bit
+word. Unfortunately, at disposal time, the needed decoding
+is not done. As a result, idr_remove() fails, and the IDR
+fills up. Since its size is 2048, the following script ends up
+with "Filter already exists":
+
+  tc filter add dev myve $FILTER1
+  tc filter add dev myve $FILTER2
+  for i in {1..2048}
+  do
+    echo $i
+    tc filter del dev myve $FILTER2
+    tc filter add dev myve $FILTER2
+  done
+
+This patch adds the missing decoding logic for handles that
+deserve it.
+
+Fixes: e7614370d6f0 ("net_sched: use idr to allocate u32 filter handles")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
+Tested-by: Victor Nogueira <victor@mojatatu.com>
+Link: https://patch.msgid.link/20241110172836.331319-1-alexandre.ferrieux@orange.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index adcc8de1d01be..e87d79d043d54 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -91,6 +91,16 @@ struct tc_u_common {
+       long                    knodes;
+ };
+ 
++static u32 handle2id(u32 h)
++{
++      return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h);
++}
++
++static u32 id2handle(u32 id)
++{
++      return (id | 0x800U) << 20;
++}
++
+ static inline unsigned int u32_hash_fold(__be32 key,
+                                        const struct tc_u32_sel *sel,
+                                        u8 fshift)
+@@ -308,7 +318,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
+       int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
+       if (id < 0)
+               return 0;
+-      return (id | 0x800U) << 20;
++      return id2handle(id);
+ }
+ 
+ static struct hlist_head *tc_u_common_hash;
+@@ -358,7 +368,7 @@ static int u32_init(struct tcf_proto *tp)
+               return -ENOBUFS;
+ 
+       refcount_set(&root_ht->refcnt, 1);
+-      root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
++      root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0);
+       root_ht->prio = tp->prio;
+       root_ht->is_root = true;
+       idr_init(&root_ht->handle_idr);
+@@ -610,7 +620,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+               if (phn == ht) {
+                       u32_clear_hw_hnode(tp, ht, extack);
+                       idr_destroy(&ht->handle_idr);
+-                      idr_remove(&tp_c->handle_idr, ht->handle);
++                      idr_remove(&tp_c->handle_idr, handle2id(ht->handle));
+                       RCU_INIT_POINTER(*hn, ht->next);
+                       kfree_rcu(ht, rcu);
+                       return 0;
+@@ -987,7 +997,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ 
+               err = u32_replace_hw_hnode(tp, ht, userflags, extack);
+               if (err) {
+-                      idr_remove(&tp_c->handle_idr, handle);
++                      idr_remove(&tp_c->handle_idr, handle2id(handle));
+                       kfree(ht);
+                       return err;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-cls_u32-replace-int-refcounts-with-proper-.patch b/queue-6.1/net-sched-cls_u32-replace-int-refcounts-with-proper-.patch

new file mode 100644 (file)

index 0000000..cb37cc8
--- /dev/null
+++ b/queue-6.1/net-sched-cls_u32-replace-int-refcounts-with-proper-.patch
@@ -0,0 +1,200 @@
+From 3b30617cda0beb6630c3289c0e76a73883bfcff3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 11:18:55 -0300
+Subject: net/sched: cls_u32: replace int refcounts with proper refcounts
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+[ Upstream commit 6b78debe1c07e6aa3c91ca0b1384bf3cb8217c50 ]
+
+Proper refcounts will always warn splat when something goes wrong,
+be it underflow, saturation or object resurrection. As these are always
+a source of bugs, use it in cls_u32 as a safeguard to prevent/catch issues.
+Another benefit is that the refcount API self documents the code, making
+clear when transitions to dead are expected.
+
+For such an update we had to make minor adaptations on u32 to fit the refcount
+API. First we set explicitly to '1' when objects are created, then the
+objects are alive until a 1 -> 0 happens, which is then released appropriately.
+
+The above made clear some redundant operations in the u32 code
+around the root_ht handling that were removed. The root_ht is created
+with a refcnt set to 1. Then when it's associated with tcf_proto it increments the refcnt to 2.
+Throughout the entire code the root_ht is an exceptional case and can never be referenced,
+therefore the refcnt never incremented/decremented.
+Its lifetime is always bound to tcf_proto, meaning if you delete tcf_proto
+the root_ht is deleted as well. The code made up for the fact that root_ht refcnt is 2 and did
+a double decrement to free it, which is not a fit for the refcount API.
+
+Even though refcount_t is implemented using atomics, we should observe
+a negligible control plane impact.
+
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20231114141856.974326-2-pctammela@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 73af53d82076 ("net: sched: cls_u32: Fix u32's systematic failure to free IDR entries for hnodes.")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 36 ++++++++++++++++++------------------
+ 1 file changed, 18 insertions(+), 18 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 04448bfb4d3db..adcc8de1d01be 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -70,7 +70,7 @@ struct tc_u_hnode {
+       struct tc_u_hnode __rcu *next;
+       u32                     handle;
+       u32                     prio;
+-      int                     refcnt;
++      refcount_t              refcnt;
+       unsigned int            divisor;
+       struct idr              handle_idr;
+       bool                    is_root;
+@@ -85,7 +85,7 @@ struct tc_u_hnode {
+ struct tc_u_common {
+       struct tc_u_hnode __rcu *hlist;
+       void                    *ptr;
+-      int                     refcnt;
++      refcount_t              refcnt;
+       struct idr              handle_idr;
+       struct hlist_node       hnode;
+       long                    knodes;
+@@ -357,7 +357,7 @@ static int u32_init(struct tcf_proto *tp)
+       if (root_ht == NULL)
+               return -ENOBUFS;
+ 
+-      root_ht->refcnt++;
++      refcount_set(&root_ht->refcnt, 1);
+       root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
+       root_ht->prio = tp->prio;
+       root_ht->is_root = true;
+@@ -369,18 +369,20 @@ static int u32_init(struct tcf_proto *tp)
+                       kfree(root_ht);
+                       return -ENOBUFS;
+               }
++              refcount_set(&tp_c->refcnt, 1);
+               tp_c->ptr = key;
+               INIT_HLIST_NODE(&tp_c->hnode);
+               idr_init(&tp_c->handle_idr);
+ 
+               hlist_add_head(&tp_c->hnode, tc_u_hash(key));
++      } else {
++              refcount_inc(&tp_c->refcnt);
+       }
+ 
+-      tp_c->refcnt++;
+       RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
+       rcu_assign_pointer(tp_c->hlist, root_ht);
+ 
+-      root_ht->refcnt++;
++      /* root_ht must be destroyed when tcf_proto is destroyed */
+       rcu_assign_pointer(tp->root, root_ht);
+       tp->data = tp_c;
+       return 0;
+@@ -391,7 +393,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
+       struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+ 
+       tcf_exts_destroy(&n->exts);
+-      if (ht && --ht->refcnt == 0)
++      if (ht && refcount_dec_and_test(&ht->refcnt))
+               kfree(ht);
+       kfree(n);
+ }
+@@ -599,8 +601,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+       struct tc_u_hnode __rcu **hn;
+       struct tc_u_hnode *phn;
+ 
+-      WARN_ON(--ht->refcnt);
+-
+       u32_clear_hnode(tp, ht, extack);
+ 
+       hn = &tp_c->hlist;
+@@ -628,10 +628,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+ 
+       WARN_ON(root_ht == NULL);
+ 
+-      if (root_ht && --root_ht->refcnt == 1)
++      if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
+               u32_destroy_hnode(tp, root_ht, extack);
+ 
+-      if (--tp_c->refcnt == 0) {
++      if (refcount_dec_and_test(&tp_c->refcnt)) {
+               struct tc_u_hnode *ht;
+ 
+               hlist_del(&tp_c->hnode);
+@@ -643,7 +643,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+                       /* u32_destroy_key() will later free ht for us, if it's
+                        * still referenced by some knode
+                        */
+-                      if (--ht->refcnt == 0)
++                      if (refcount_dec_and_test(&ht->refcnt))
+                               kfree_rcu(ht, rcu);
+               }
+ 
+@@ -672,7 +672,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
+               return -EINVAL;
+       }
+ 
+-      if (ht->refcnt == 1) {
++      if (refcount_dec_if_one(&ht->refcnt)) {
+               u32_destroy_hnode(tp, ht, extack);
+       } else {
+               NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
+@@ -680,7 +680,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
+       }
+ 
+ out:
+-      *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
++      *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
+       return ret;
+ }
+ 
+@@ -764,14 +764,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+                               NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
+                               return -EINVAL;
+                       }
+-                      ht_down->refcnt++;
++                      refcount_inc(&ht_down->refcnt);
+               }
+ 
+               ht_old = rtnl_dereference(n->ht_down);
+               rcu_assign_pointer(n->ht_down, ht_down);
+ 
+               if (ht_old)
+-                      ht_old->refcnt--;
++                      refcount_dec(&ht_old->refcnt);
+       }
+ 
+       if (ifindex >= 0)
+@@ -850,7 +850,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+ 
+       /* bump reference count as long as we hold pointer to structure */
+       if (ht)
+-              ht->refcnt++;
++              refcount_inc(&ht->refcnt);
+ 
+       return new;
+ }
+@@ -930,7 +930,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ 
+                               ht_old = rtnl_dereference(n->ht_down);
+                               if (ht_old)
+-                                      ht_old->refcnt++;
++                                      refcount_inc(&ht_old->refcnt);
+                       }
+                       __u32_destroy_key(new);
+                       return err;
+@@ -978,7 +978,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+                               return err;
+                       }
+               }
+-              ht->refcnt = 1;
++              refcount_set(&ht->refcnt, 1);
+               ht->divisor = divisor;
+               ht->handle = handle;
+               ht->prio = tp->prio;
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch b/queue-6.1/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch

new file mode 100644 (file)

index 0000000..0581c01
--- /dev/null
+++ b/queue-6.1/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch
@@ -0,0 +1,46 @@
+From 4ada504ddf959de38ac5953e35ceb940cc0923c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Nov 2024 12:43:43 +0100
+Subject: net: vertexcom: mse102x: Fix tx_bytes calculation
+
+From: Stefan Wahren <wahrenst@gmx.net>
+
+[ Upstream commit e68da664d379f352d41d7955712c44e0a738e4ab ]
+
+The tx_bytes should consider the actual size of the Ethernet frames
+without the SPI encapsulation. But we still need to take care of
+Ethernet padding.
+
+Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support")
+Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
+Link: https://patch.msgid.link/20241108114343.6174-3-wahrenst@gmx.net
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/vertexcom/mse102x.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
+index dd766e175f7db..8f67c39f479ee 100644
+--- a/drivers/net/ethernet/vertexcom/mse102x.c
++++ b/drivers/net/ethernet/vertexcom/mse102x.c
+@@ -437,13 +437,15 @@ static void mse102x_tx_work(struct work_struct *work)
+       mse = &mses->mse102x;
+ 
+       while ((txb = skb_dequeue(&mse->txq))) {
++              unsigned int len = max_t(unsigned int, txb->len, ETH_ZLEN);
++
+               mutex_lock(&mses->lock);
+               ret = mse102x_tx_pkt_spi(mse, txb, work_timeout);
+               mutex_unlock(&mses->lock);
+               if (ret) {
+                       mse->ndev->stats.tx_dropped++;
+               } else {
+-                      mse->ndev->stats.tx_bytes += txb->len;
++                      mse->ndev->stats.tx_bytes += len;
+                       mse->ndev->stats.tx_packets++;
+               }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.1/netlink-terminate-outstanding-dump-on-socket-close.patch b/queue-6.1/netlink-terminate-outstanding-dump-on-socket-close.patch

new file mode 100644 (file)

index 0000000..d18ed0b
--- /dev/null
+++ b/queue-6.1/netlink-terminate-outstanding-dump-on-socket-close.patch
@@ -0,0 +1,142 @@
+From 8b4d7a14abcfd8288c01d7da64867892bfd55df6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2024 17:52:34 -0800
+Subject: netlink: terminate outstanding dump on socket close
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1904fb9ebf911441f90a68e96b22aa73e4410505 ]
+
+Netlink supports iterative dumping of data. It provides the families
+the following ops:
+ - start - (optional) kicks off the dumping process
+ - dump  - actual dump helper, keeps getting called until it returns 0
+ - done  - (optional) pairs with .start, can be used for cleanup
+The whole process is asynchronous and the repeated calls to .dump
+don't actually happen in a tight loop, but rather are triggered
+in response to recvmsg() on the socket.
+
+This gives the user full control over the dump, but also means that
+the user can close the socket without getting to the end of the dump.
+To make sure .start is always paired with .done we check if there
+is an ongoing dump before freeing the socket, and if so call .done.
+
+The complication is that sockets can get freed from BH and .done
+is allowed to sleep. So we use a workqueue to defer the call, when
+needed.
+
+Unfortunately this does not work correctly. What we defer is not
+the cleanup but rather releasing a reference on the socket.
+We have no guarantee that we own the last reference, if someone
+else holds the socket they may release it in BH and we're back
+to square one.
+
+The whole dance, however, appears to be unnecessary. Only the user
+can interact with dumps, so we can clean up when socket is closed.
+And close always happens in process context. Some async code may
+still access the socket after close, queue notification skbs to it etc.
+but no dumps can start, end or otherwise make progress.
+
+Delete the workqueue and flush the dump state directly from the release
+handler. Note that further cleanup is possible in -next, for instance
+we now always call .done before releasing the main module reference,
+so dump doesn't have to take a reference of its own.
+
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Fixes: ed5d7788a934 ("netlink: Do not schedule work from sk_destruct")
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netlink/af_netlink.c | 31 ++++++++-----------------------
+ net/netlink/af_netlink.h |  2 --
+ 2 files changed, 8 insertions(+), 25 deletions(-)
+
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 9eb87f35bc65e..8a74847dacaf1 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+ 
+ static void netlink_sock_destruct(struct sock *sk)
+ {
+-      struct netlink_sock *nlk = nlk_sk(sk);
+-
+-      if (nlk->cb_running) {
+-              if (nlk->cb.done)
+-                      nlk->cb.done(&nlk->cb);
+-              module_put(nlk->cb.module);
+-              kfree_skb(nlk->cb.skb);
+-      }
+-
+       skb_queue_purge(&sk->sk_receive_queue);
+ 
+       if (!sock_flag(sk, SOCK_DEAD)) {
+@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
+       WARN_ON(nlk_sk(sk)->groups);
+ }
+ 
+-static void netlink_sock_destruct_work(struct work_struct *work)
+-{
+-      struct netlink_sock *nlk = container_of(work, struct netlink_sock,
+-                                              work);
+-
+-      sk_free(&nlk->sk);
+-}
+-
+ /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+  * SMP. Look, when several writers sleep and reader wakes them up, all but one
+  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+@@ -736,12 +719,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
+       if (!refcount_dec_and_test(&sk->sk_refcnt))
+               return;
+ 
+-      if (nlk->cb_running && nlk->cb.done) {
+-              INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+-              schedule_work(&nlk->work);
+-              return;
+-      }
+-
+       sk_free(sk);
+ }
+ 
+@@ -791,6 +768,14 @@ static int netlink_release(struct socket *sock)
+                               NETLINK_URELEASE, &n);
+       }
+ 
++      /* Terminate any outstanding dump */
++      if (nlk->cb_running) {
++              if (nlk->cb.done)
++                      nlk->cb.done(&nlk->cb);
++              module_put(nlk->cb.module);
++              kfree_skb(nlk->cb.skb);
++      }
++
+       module_put(nlk->module);
+ 
+       if (netlink_is_kernel(sk)) {
+diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
+index b30b8fc760f71..aa430e4d58d80 100644
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -4,7 +4,6 @@
+ 
+ #include <linux/rhashtable.h>
+ #include <linux/atomic.h>
+-#include <linux/workqueue.h>
+ #include <net/sock.h>
+ 
+ /* flags */
+@@ -48,7 +47,6 @@ struct netlink_sock {
+ 
+       struct rhash_head       node;
+       struct rcu_head         rcu;
+-      struct work_struct      work;
+ };
+ 
+ static inline struct netlink_sock *nlk_sk(struct sock *sk)
+-- 
+2.43.0
+
diff --git a/queue-6.1/samples-pktgen-correct-dev-to-dev.patch b/queue-6.1/samples-pktgen-correct-dev-to-dev.patch

new file mode 100644 (file)

index 0000000..e15de7c
--- /dev/null
+++ b/queue-6.1/samples-pktgen-correct-dev-to-dev.patch
@@ -0,0 +1,40 @@
+From 77e4b953c8f58ce9e187ef6a047cdf875a7a6b25 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Nov 2024 11:03:47 +0800
+Subject: samples: pktgen: correct dev to DEV
+
+From: Wei Fang <wei.fang@nxp.com>
+
+[ Upstream commit 3342dc8b4623d835e7dd76a15cec2e5a94fe2f93 ]
+
+In the pktgen_sample01_simple.sh script, the device variable is uppercase
+'DEV' instead of lowercase 'dev'. Because of this typo, the script cannot
+enable UDP tx checksum.
+
+Fixes: 460a9aa23de6 ("samples: pktgen: add UDP tx checksum support")
+Signed-off-by: Wei Fang <wei.fang@nxp.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://patch.msgid.link/20241112030347.1849335-1-wei.fang@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ samples/pktgen/pktgen_sample01_simple.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
+index 09a92ea963f98..c8e75888a9c20 100755
+--- a/samples/pktgen/pktgen_sample01_simple.sh
++++ b/samples/pktgen/pktgen_sample01_simple.sh
+@@ -72,7 +72,7 @@ if [ -n "$DST_PORT" ]; then
+     pg_set $DEV "udp_dst_max $UDP_DST_MAX"
+ fi
+ 
+-[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
++[ ! -z "$UDP_CSUM" ] && pg_set $DEV "flag UDPCSUM"
+ 
+ # Setup random UDP port src range
+ pg_set $DEV "flag UDPSRC_RND"
+-- 
+2.43.0
+
diff --git a/queue-6.1/series b/queue-6.1/series

new file mode 100644 (file)

index 0000000..64b3fcc
--- /dev/null
+++ b/queue-6.1/series
@@ -0,0 +1,15 @@
+netlink-terminate-outstanding-dump-on-socket-close.patch
+net-vertexcom-mse102x-fix-tx_bytes-calculation.patch
+drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch
+mptcp-error-out-earlier-on-disconnect.patch
+net-mlx5-fs-lock-fte-when-checking-if-active.patch
+net-mlx5e-ktls-fix-incorrect-page-refcounting.patch
+net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch
+virtio-vsock-fix-accept_queue-memory-leak.patch
+bluetooth-hci_event-remove-code-to-removed-config_bt.patch
+bluetooth-hci_core-fix-calling-mgmt_device_connected.patch
+net-sched-cls_u32-replace-int-refcounts-with-proper-.patch
+net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch
+samples-pktgen-correct-dev-to-dev.patch
+bonding-add-ns-target-multicast-address-to-slave-dev.patch
+arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch
diff --git a/queue-6.1/virtio-vsock-fix-accept_queue-memory-leak.patch b/queue-6.1/virtio-vsock-fix-accept_queue-memory-leak.patch

new file mode 100644 (file)

index 0000000..ba2d069
--- /dev/null
+++ b/queue-6.1/virtio-vsock-fix-accept_queue-memory-leak.patch
@@ -0,0 +1,93 @@
+From c8e2ffc96d27901bf54c3ec59b567b78033ce0bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2024 21:46:12 +0100
+Subject: virtio/vsock: Fix accept_queue memory leak
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit d7b0ff5a866724c3ad21f2628c22a63336deec3f ]
+
+As the final stages of socket destruction may be delayed, it is possible
+that virtio_transport_recv_listen() will be called after the accept_queue
+has been flushed, but before the SOCK_DONE flag has been set. As a result,
+sockets enqueued after the flush would remain unremoved, leading to a
+memory leak.
+
+vsock_release
+  __vsock_release
+    lock
+    virtio_transport_release
+      virtio_transport_close
+        schedule_delayed_work(close_work)
+    sk_shutdown = SHUTDOWN_MASK
+(!) flush accept_queue
+    release
+                                        virtio_transport_recv_pkt
+                                          vsock_find_bound_socket
+                                          lock
+                                          if flag(SOCK_DONE) return
+                                          virtio_transport_recv_listen
+                                            child = vsock_create_connected
+                                      (!)   vsock_enqueue_accept(child)
+                                          release
+close_work
+  lock
+  virtio_transport_do_close
+    set_flag(SOCK_DONE)
+    virtio_transport_remove_sock
+      vsock_remove_sock
+        vsock_remove_bound
+  release
+
+Introduce a sk_shutdown check to disallow vsock_enqueue_accept() during
+socket destruction.
+
+unreferenced object 0xffff888109e3f800 (size 2040):
+  comm "kworker/5:2", pid 371, jiffies 4294940105
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    28 00 0b 40 00 00 00 00 00 00 00 00 00 00 00 00  (..@............
+  backtrace (crc 9e5f4e84):
+    [<ffffffff81418ff1>] kmem_cache_alloc_noprof+0x2c1/0x360
+    [<ffffffff81d27aa0>] sk_prot_alloc+0x30/0x120
+    [<ffffffff81d2b54c>] sk_alloc+0x2c/0x4b0
+    [<ffffffff81fe049a>] __vsock_create.constprop.0+0x2a/0x310
+    [<ffffffff81fe6d6c>] virtio_transport_recv_pkt+0x4dc/0x9a0
+    [<ffffffff81fe745d>] vsock_loopback_work+0xfd/0x140
+    [<ffffffff810fc6ac>] process_one_work+0x20c/0x570
+    [<ffffffff810fce3f>] worker_thread+0x1bf/0x3a0
+    [<ffffffff811070dd>] kthread+0xdd/0x110
+    [<ffffffff81044fdd>] ret_from_fork+0x2d/0x50
+    [<ffffffff8100785a>] ret_from_fork_asm+0x1a/0x30
+
+Fixes: 3fe356d58efa ("vsock/virtio: discard packets only when socket is really closed")
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/vmw_vsock/virtio_transport_common.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
+index b22dc7bed2182..3bc573cbf8a6e 100644
+--- a/net/vmw_vsock/virtio_transport_common.c
++++ b/net/vmw_vsock/virtio_transport_common.c
+@@ -1220,6 +1220,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
+               return -ENOMEM;
+       }
+ 
++      /* __vsock_release() might have already flushed accept_queue.
++       * Subsequent enqueues would lead to a memory leak.
++       */
++      if (sk->sk_shutdown == SHUTDOWN_MASK) {
++              virtio_transport_reset_no_sock(t, skb);
++              return -ESHUTDOWN;
++      }
++
+       child = vsock_create_connected(sk);
+       if (!child) {
+               virtio_transport_reset_no_sock(t, skb);
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 17 Nov 2024 14:35:19 +0000 (09:35 -0500)
queue-6.1/arm-9419-1-mm-fix-kernel-memory-mapping-for-xip-kern.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-hci_core-fix-calling-mgmt_device_connected.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-hci_event-remove-code-to-removed-config_bt.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bonding-add-ns-target-multicast-address-to-slave-dev.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-rockchip-vop-fix-a-dereferenced-before-check-war.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-error-out-earlier-on-disconnect.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-mlx5-fs-lock-fte-when-checking-if-active.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-mlx5e-ct-fix-null-ptr-deref-in-add-rule-err-flow.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-mlx5e-ktls-fix-incorrect-page-refcounting.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_u32-fix-u32-s-systematic-failure-to-fr.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_u32-replace-int-refcounts-with-proper-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-vertexcom-mse102x-fix-tx_bytes-calculation.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netlink-terminate-outstanding-dump-on-socket-close.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/samples-pktgen-correct-dev-to-dev.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series	[new file with mode: 0644]	patch \| blob
queue-6.1/virtio-vsock-fix-accept_queue-memory-leak.patch	[new file with mode: 0644]	patch \| blob