--- /dev/null
+From 14e6fdf3653bd8b53176bdd03e706b0aa9941884 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Feb 2025 13:12:54 -0500
+Subject: Bluetooth: hci_event: Fix enabling passive scanning
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 0bdd88971519cfa8a76d1a4dde182e74cfbd5d5c ]
+
+Passive scanning shall only be enabled when disconnecting LE links,
+otherwise it may start result in triggering scanning when e.g. an ISO
+link disconnects:
+
+> HCI Event: LE Meta Event (0x3e) plen 29
+ LE Connected Isochronous Stream Established (0x19)
+ Status: Success (0x00)
+ Connection Handle: 257
+ CIG Synchronization Delay: 0 us (0x000000)
+ CIS Synchronization Delay: 0 us (0x000000)
+ Central to Peripheral Latency: 10000 us (0x002710)
+ Peripheral to Central Latency: 10000 us (0x002710)
+ Central to Peripheral PHY: LE 2M (0x02)
+ Peripheral to Central PHY: LE 2M (0x02)
+ Number of Subevents: 1
+ Central to Peripheral Burst Number: 1
+ Peripheral to Central Burst Number: 1
+ Central to Peripheral Flush Timeout: 2
+ Peripheral to Central Flush Timeout: 2
+ Central to Peripheral MTU: 320
+ Peripheral to Central MTU: 160
+ ISO Interval: 10.00 msec (0x0008)
+...
+> HCI Event: Disconnect Complete (0x05) plen 4
+ Status: Success (0x00)
+ Handle: 257
+ Reason: Remote User Terminated Connection (0x13)
+< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
+ Extended scan: Enabled (0x01)
+ Filter duplicates: Enabled (0x01)
+ Duration: 0 msec (0x0000)
+ Period: 0.00 sec (0x0000)
+
+Fixes: 9fcb18ef3acb ("Bluetooth: Introduce LE auto connect options")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 37 ++++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 388d46c6a043d..d64117be62cc4 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -3393,23 +3393,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
+ hci_update_scan(hdev);
+ }
+
+- params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
+- if (params) {
+- switch (params->auto_connect) {
+- case HCI_AUTO_CONN_LINK_LOSS:
+- if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
++ /* Re-enable passive scanning if disconnected device is marked
++ * as auto-connectable.
++ */
++ if (conn->type == LE_LINK) {
++ params = hci_conn_params_lookup(hdev, &conn->dst,
++ conn->dst_type);
++ if (params) {
++ switch (params->auto_connect) {
++ case HCI_AUTO_CONN_LINK_LOSS:
++ if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
++ break;
++ fallthrough;
++
++ case HCI_AUTO_CONN_DIRECT:
++ case HCI_AUTO_CONN_ALWAYS:
++ hci_pend_le_list_del_init(params);
++ hci_pend_le_list_add(params,
++ &hdev->pend_le_conns);
++ hci_update_passive_scan(hdev);
+ break;
+- fallthrough;
+
+- case HCI_AUTO_CONN_DIRECT:
+- case HCI_AUTO_CONN_ALWAYS:
+- hci_pend_le_list_del_init(params);
+- hci_pend_le_list_add(params, &hdev->pend_le_conns);
+- hci_update_passive_scan(hdev);
+- break;
+-
+- default:
+- break;
++ default:
++ break;
++ }
+ }
+ }
+
+--
+2.39.5
+
--- /dev/null
+From cee902bae2f5060c2fe7031f60e94f7a09589cc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 20:10:22 -0800
+Subject: bnxt_en: handle tpa_info in queue API implementation
+
+From: David Wei <dw@davidwei.uk>
+
+[ Upstream commit bd649c5cc958169b8a8a3e77ea926d92d472b02a ]
+
+Commit 7ed816be35ab ("eth: bnxt: use page pool for head frags") added a
+page pool for header frags, which may be distinct from the existing pool
+for the aggregation ring. Prior to this change, frags used in the TPA
+ring rx_tpa were allocated from system memory e.g. napi_alloc_frag()
+meaning their lifetimes were not associated with a page pool. They can
+be returned at any time and so the queue API did not alloc or free
+rx_tpa.
+
+But now frags come from a separate head_pool which may be different to
+page_pool. Without allocating and freeing rx_tpa, frags allocated from
+the old head_pool may be returned to a different new head_pool which
+causes a mismatch between the pp hold/release count.
+
+Fix this problem by properly freeing and allocating rx_tpa in the queue
+API implementation.
+
+Signed-off-by: David Wei <dw@davidwei.uk>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://patch.msgid.link/20241204041022.56512-4-dw@davidwei.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 87dd2850835d ("eth: bnxt: fix memory leak in queue reset")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 27 +++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 800a63daba2b4..ee52ac821ef9a 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -3623,7 +3623,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
+ page_pool_destroy(rxr->page_pool);
+- if (rxr->page_pool != rxr->head_pool)
++ if (bnxt_separate_head_pool())
+ page_pool_destroy(rxr->head_pool);
+ rxr->page_pool = rxr->head_pool = NULL;
+
+@@ -15199,15 +15199,25 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
+ goto err_free_rx_agg_ring;
+ }
+
++ if (bp->flags & BNXT_FLAG_TPA) {
++ rc = bnxt_alloc_one_tpa_info(bp, clone);
++ if (rc)
++ goto err_free_tpa_info;
++ }
++
+ bnxt_init_one_rx_ring_rxbd(bp, clone);
+ bnxt_init_one_rx_agg_ring_rxbd(bp, clone);
+
+ bnxt_alloc_one_rx_ring_skb(bp, clone, idx);
+ if (bp->flags & BNXT_FLAG_AGG_RINGS)
+ bnxt_alloc_one_rx_ring_page(bp, clone, idx);
++ if (bp->flags & BNXT_FLAG_TPA)
++ bnxt_alloc_one_tpa_info_data(bp, clone);
+
+ return 0;
+
++err_free_tpa_info:
++ bnxt_free_one_tpa_info(bp, clone);
+ err_free_rx_agg_ring:
+ bnxt_free_ring(bp, &clone->rx_agg_ring_struct.ring_mem);
+ err_free_rx_ring:
+@@ -15215,9 +15225,11 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
+ err_rxq_info_unreg:
+ xdp_rxq_info_unreg(&clone->xdp_rxq);
+ err_page_pool_destroy:
+- clone->page_pool->p.napi = NULL;
+ page_pool_destroy(clone->page_pool);
++ if (bnxt_separate_head_pool())
++ page_pool_destroy(clone->head_pool);
+ clone->page_pool = NULL;
++ clone->head_pool = NULL;
+ return rc;
+ }
+
+@@ -15227,13 +15239,15 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
+ struct bnxt *bp = netdev_priv(dev);
+ struct bnxt_ring_struct *ring;
+
+- bnxt_free_one_rx_ring(bp, rxr);
+- bnxt_free_one_rx_agg_ring(bp, rxr);
++ bnxt_free_one_rx_ring_skbs(bp, rxr);
+
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
+ page_pool_destroy(rxr->page_pool);
++ if (bnxt_separate_head_pool())
++ page_pool_destroy(rxr->head_pool);
+ rxr->page_pool = NULL;
++ rxr->head_pool = NULL;
+
+ ring = &rxr->rx_ring_struct;
+ bnxt_free_ring(bp, &ring->ring_mem);
+@@ -15315,7 +15329,10 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
+ rxr->rx_agg_prod = clone->rx_agg_prod;
+ rxr->rx_sw_agg_prod = clone->rx_sw_agg_prod;
+ rxr->rx_next_cons = clone->rx_next_cons;
++ rxr->rx_tpa = clone->rx_tpa;
++ rxr->rx_tpa_idx_map = clone->rx_tpa_idx_map;
+ rxr->page_pool = clone->page_pool;
++ rxr->head_pool = clone->head_pool;
+ rxr->xdp_rxq = clone->xdp_rxq;
+
+ bnxt_copy_rx_ring(bp, rxr, clone);
+@@ -15376,6 +15393,8 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
+ bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
+ rxr->rx_next_cons = 0;
+ page_pool_disable_direct_recycling(rxr->page_pool);
++ if (bnxt_separate_head_pool())
++ page_pool_disable_direct_recycling(rxr->head_pool);
+
+ memcpy(qmem, rxr, sizeof(*rxr));
+ bnxt_init_rx_ring_struct(bp, qmem);
+--
+2.39.5
+
--- /dev/null
+From 243298415ccfac98d5fa4dc18d485768c831d0d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 20:10:20 -0800
+Subject: bnxt_en: refactor tpa_info alloc/free into helpers
+
+From: David Wei <dw@davidwei.uk>
+
+[ Upstream commit 5883a3e0babf55d85422fddec3422f211c853f6e ]
+
+Refactor bnxt_rx_ring_info->tpa_info operations into helpers that work
+on a single tpa_info in prep for queue API using them.
+
+There are 2 pairs of operations:
+
+* bnxt_alloc_one_tpa_info()
+* bnxt_free_one_tpa_info()
+
+These alloc/free the tpa_info array itself.
+
+* bnxt_alloc_one_tpa_info_data()
+* bnxt_free_one_tpa_info_data()
+
+These alloc/free the frags stored in tpa_info array.
+
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: David Wei <dw@davidwei.uk>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://patch.msgid.link/20241204041022.56512-2-dw@davidwei.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 87dd2850835d ("eth: bnxt: fix memory leak in queue reset")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 142 ++++++++++++++--------
+ 1 file changed, 90 insertions(+), 52 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index b97bced5c002c..800a63daba2b4 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -3363,15 +3363,11 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info
+ }
+ }
+
+-static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
++static void bnxt_free_one_tpa_info_data(struct bnxt *bp,
++ struct bnxt_rx_ring_info *rxr)
+ {
+- struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
+- struct bnxt_tpa_idx_map *map;
+ int i;
+
+- if (!rxr->rx_tpa)
+- goto skip_rx_tpa_free;
+-
+ for (i = 0; i < bp->max_tpa; i++) {
+ struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[i];
+ u8 *data = tpa_info->data;
+@@ -3382,6 +3378,17 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
+ tpa_info->data = NULL;
+ page_pool_free_va(rxr->head_pool, data, false);
+ }
++}
++
++static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp,
++ struct bnxt_rx_ring_info *rxr)
++{
++ struct bnxt_tpa_idx_map *map;
++
++ if (!rxr->rx_tpa)
++ goto skip_rx_tpa_free;
++
++ bnxt_free_one_tpa_info_data(bp, rxr);
+
+ skip_rx_tpa_free:
+ if (!rxr->rx_buf_ring)
+@@ -3409,7 +3416,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
+ return;
+
+ for (i = 0; i < bp->rx_nr_rings; i++)
+- bnxt_free_one_rx_ring_skbs(bp, i);
++ bnxt_free_one_rx_ring_skbs(bp, &bp->rx_ring[i]);
+ }
+
+ static void bnxt_free_skbs(struct bnxt *bp)
+@@ -3521,29 +3528,64 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
+ return 0;
+ }
+
++static void bnxt_free_one_tpa_info(struct bnxt *bp,
++ struct bnxt_rx_ring_info *rxr)
++{
++ int i;
++
++ kfree(rxr->rx_tpa_idx_map);
++ rxr->rx_tpa_idx_map = NULL;
++ if (rxr->rx_tpa) {
++ for (i = 0; i < bp->max_tpa; i++) {
++ kfree(rxr->rx_tpa[i].agg_arr);
++ rxr->rx_tpa[i].agg_arr = NULL;
++ }
++ }
++ kfree(rxr->rx_tpa);
++ rxr->rx_tpa = NULL;
++}
++
+ static void bnxt_free_tpa_info(struct bnxt *bp)
+ {
+- int i, j;
++ int i;
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+
+- kfree(rxr->rx_tpa_idx_map);
+- rxr->rx_tpa_idx_map = NULL;
+- if (rxr->rx_tpa) {
+- for (j = 0; j < bp->max_tpa; j++) {
+- kfree(rxr->rx_tpa[j].agg_arr);
+- rxr->rx_tpa[j].agg_arr = NULL;
+- }
+- }
+- kfree(rxr->rx_tpa);
+- rxr->rx_tpa = NULL;
++ bnxt_free_one_tpa_info(bp, rxr);
+ }
+ }
+
++static int bnxt_alloc_one_tpa_info(struct bnxt *bp,
++ struct bnxt_rx_ring_info *rxr)
++{
++ struct rx_agg_cmp *agg;
++ int i;
++
++ rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
++ GFP_KERNEL);
++ if (!rxr->rx_tpa)
++ return -ENOMEM;
++
++ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
++ return 0;
++ for (i = 0; i < bp->max_tpa; i++) {
++ agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
++ if (!agg)
++ return -ENOMEM;
++ rxr->rx_tpa[i].agg_arr = agg;
++ }
++ rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
++ GFP_KERNEL);
++ if (!rxr->rx_tpa_idx_map)
++ return -ENOMEM;
++
++ return 0;
++}
++
+ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+ {
+- int i, j;
++ int i, rc;
+
+ bp->max_tpa = MAX_TPA;
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+@@ -3554,25 +3596,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+- struct rx_agg_cmp *agg;
+-
+- rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info),
+- GFP_KERNEL);
+- if (!rxr->rx_tpa)
+- return -ENOMEM;
+
+- if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+- continue;
+- for (j = 0; j < bp->max_tpa; j++) {
+- agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
+- if (!agg)
+- return -ENOMEM;
+- rxr->rx_tpa[j].agg_arr = agg;
+- }
+- rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+- GFP_KERNEL);
+- if (!rxr->rx_tpa_idx_map)
+- return -ENOMEM;
++ rc = bnxt_alloc_one_tpa_info(bp, rxr);
++ if (rc)
++ return rc;
+ }
+ return 0;
+ }
+@@ -4181,10 +4208,31 @@ static void bnxt_alloc_one_rx_ring_page(struct bnxt *bp,
+ rxr->rx_agg_prod = prod;
+ }
+
++static int bnxt_alloc_one_tpa_info_data(struct bnxt *bp,
++ struct bnxt_rx_ring_info *rxr)
++{
++ dma_addr_t mapping;
++ u8 *data;
++ int i;
++
++ for (i = 0; i < bp->max_tpa; i++) {
++ data = __bnxt_alloc_rx_frag(bp, &mapping, rxr,
++ GFP_KERNEL);
++ if (!data)
++ return -ENOMEM;
++
++ rxr->rx_tpa[i].data = data;
++ rxr->rx_tpa[i].data_ptr = data + bp->rx_offset;
++ rxr->rx_tpa[i].mapping = mapping;
++ }
++
++ return 0;
++}
++
+ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr)
+ {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
+- int i;
++ int rc;
+
+ bnxt_alloc_one_rx_ring_skb(bp, rxr, ring_nr);
+
+@@ -4194,19 +4242,9 @@ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr)
+ bnxt_alloc_one_rx_ring_page(bp, rxr, ring_nr);
+
+ if (rxr->rx_tpa) {
+- dma_addr_t mapping;
+- u8 *data;
+-
+- for (i = 0; i < bp->max_tpa; i++) {
+- data = __bnxt_alloc_rx_frag(bp, &mapping, rxr,
+- GFP_KERNEL);
+- if (!data)
+- return -ENOMEM;
+-
+- rxr->rx_tpa[i].data = data;
+- rxr->rx_tpa[i].data_ptr = data + bp->rx_offset;
+- rxr->rx_tpa[i].mapping = mapping;
+- }
++ rc = bnxt_alloc_one_tpa_info_data(bp, rxr);
++ if (rc)
++ return rc;
+ }
+ return 0;
+ }
+@@ -13463,7 +13501,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
+ bnxt_reset_task(bp, true);
+ break;
+ }
+- bnxt_free_one_rx_ring_skbs(bp, i);
++ bnxt_free_one_rx_ring_skbs(bp, rxr);
+ rxr->rx_prod = 0;
+ rxr->rx_agg_prod = 0;
+ rxr->rx_sw_agg_prod = 0;
+--
+2.39.5
+
--- /dev/null
+From dc697a782763a4d7629605ea095d23c23042bf7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 02:39:22 +0000
+Subject: bonding: fix incorrect MAC address setting to receive NS messages
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 0c5e145a350de3b38cd5ae77a401b12c46fb7c1d ]
+
+When validation on the backup slave is enabled, we need to validate the
+Neighbor Solicitation (NS) messages received on the backup slave. To
+receive these messages, the correct destination MAC address must be added
+to the slave. However, the target in bonding is a unicast address, which
+we cannot use directly. Instead, we should first convert it to a
+Solicited-Node Multicast Address and then derive the corresponding MAC
+address.
+
+Fix the incorrect MAC address setting on both slave_set_ns_maddr() and
+slave_set_ns_maddrs(). Since the two function names are similar. Add
+some description for the functions. Also only use one mac_addr variable
+in slave_set_ns_maddr() to save some code and logic.
+
+Fixes: 8eb36164d1a6 ("bonding: add ns target multicast address to slave device")
+Acked-by: Jay Vosburgh <jv@jvosburgh.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250306023923.38777-2-liuhangbin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_options.c | 55 +++++++++++++++++++++++++-----
+ 1 file changed, 47 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
+index 327b6ecdc77e0..d1b095af253bd 100644
+--- a/drivers/net/bonding/bond_options.c
++++ b/drivers/net/bonding/bond_options.c
+@@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla
+ slave->dev->flags & IFF_MULTICAST;
+ }
+
++/**
++ * slave_set_ns_maddrs - add/del all NS mac addresses for slave
++ * @bond: bond device
++ * @slave: slave device
++ * @add: add or remove all the NS mac addresses
++ *
++ * This function tries to add or delete all the NS mac addresses on the slave
++ *
++ * Note, the IPv6 NS target address is the unicast address in Neighbor
++ * Solicitation (NS) message. The dest address of NS message should be
++ * solicited-node multicast address of the target. The dest mac of NS message
++ * is converted from the solicited-node multicast address.
++ *
++ * This function is called when
++ * * arp_validate changes
++ * * enslaving, releasing new slaves
++ */
+ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+ {
+ struct in6_addr *targets = bond->params.ns_targets;
+ char slot_maddr[MAX_ADDR_LEN];
++ struct in6_addr mcaddr;
+ int i;
+
+ if (!slave_can_set_ns_maddr(bond, slave))
+@@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool
+ if (ipv6_addr_any(&targets[i]))
+ break;
+
+- if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
++ addrconf_addr_solict_mult(&targets[i], &mcaddr);
++ if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) {
+ if (add)
+ dev_mc_add(slave->dev, slot_maddr);
+ else
+@@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave)
+ slave_set_ns_maddrs(bond, slave, false);
+ }
+
++/**
++ * slave_set_ns_maddr - set new NS mac address for slave
++ * @bond: bond device
++ * @slave: slave device
++ * @target: the new IPv6 target
++ * @slot: the old IPv6 target in the slot
++ *
++ * This function tries to replace the old mac address to new one on the slave.
++ *
++ * Note, the target/slot IPv6 address is the unicast address in Neighbor
++ * Solicitation (NS) message. The dest address of NS message should be
++ * solicited-node multicast address of the target. The dest mac of NS message
++ * is converted from the solicited-node multicast address.
++ *
++ * This function is called when
++ * * An IPv6 NS target is added or removed.
++ */
+ static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
+ struct in6_addr *target, struct in6_addr *slot)
+ {
+- char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
++ char mac_addr[MAX_ADDR_LEN];
++ struct in6_addr mcast_addr;
+
+ if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
+ return;
+
+- /* remove the previous maddr from slave */
++ /* remove the previous mac addr from slave */
++ addrconf_addr_solict_mult(slot, &mcast_addr);
+ if (!ipv6_addr_any(slot) &&
+- !ndisc_mc_map(slot, slot_maddr, slave->dev, 0))
+- dev_mc_del(slave->dev, slot_maddr);
++ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0))
++ dev_mc_del(slave->dev, mac_addr);
+
+- /* add new maddr on slave if target is set */
++ /* add new mac addr on slave if target is set */
++ addrconf_addr_solict_mult(target, &mcast_addr);
+ if (!ipv6_addr_any(target) &&
+- !ndisc_mc_map(target, target_maddr, slave->dev, 0))
+- dev_mc_add(slave->dev, target_maddr);
++ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0))
++ dev_mc_add(slave->dev, mac_addr);
+ }
+
+ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
+--
+2.39.5
+
--- /dev/null
+From 7d8881fe0204ad4ce5d5d4e251282880569d4866 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 20:52:08 -0700
+Subject: Drivers: hv: vmbus: Don't release fb_mmio resource in
+ vmbus_free_mmio()
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+[ Upstream commit 73fe9073c0cc28056cb9de0c8a516dac070f1d1f ]
+
+The VMBus driver manages the MMIO space it owns via the hyperv_mmio
+resource tree. Because the synthetic video framebuffer portion of the
+MMIO space is initially setup by the Hyper-V host for each guest, the
+VMBus driver does an early reserve of that portion of MMIO space in the
+hyperv_mmio resource tree. It saves a pointer to that resource in
+fb_mmio. When a VMBus driver requests MMIO space and passes "true"
+for the "fb_overlap_ok" argument, the reserved framebuffer space is
+used if possible. In that case it's not necessary to do another request
+against the "shadow" hyperv_mmio resource tree because that resource
+was already requested in the early reserve steps.
+
+However, the vmbus_free_mmio() function currently does no special
+handling for the fb_mmio resource. When a framebuffer device is
+removed, or the driver is unbound, the current code for
+vmbus_free_mmio() releases the reserved resource, leaving fb_mmio
+pointing to memory that has been freed. If the same or another
+driver is subsequently bound to the device, vmbus_allocate_mmio()
+checks against fb_mmio, and potentially gets garbage. Furthermore
+a second unbind operation produces this "nonexistent resource" error
+because of the unbalanced behavior between vmbus_allocate_mmio() and
+vmbus_free_mmio():
+
+[ 55.499643] resource: Trying to free nonexistent
+ resource <0x00000000f0000000-0x00000000f07fffff>
+
+Fix this by adding logic to vmbus_free_mmio() to recognize when
+MMIO space in the fb_mmio reserved area would be released, and don't
+release it. This filtering ensures the fb_mmio resource always exists,
+and makes vmbus_free_mmio() more parallel with vmbus_allocate_mmio().
+
+Fixes: be000f93e5d7 ("drivers:hv: Track allocations of children of hv_vmbus in private resource tree")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Tested-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Link: https://lore.kernel.org/r/20250310035208.275764-1-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20250310035208.275764-1-mhklinux@outlook.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hv/vmbus_drv.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 9b15f7daf5059..2b6749c9712ef 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
+ struct resource *iter;
+
+ mutex_lock(&hyperv_mmio_lock);
++
++ /*
++ * If all bytes of the MMIO range to be released are within the
++ * special case fb_mmio shadow region, skip releasing the shadow
++ * region since no corresponding __request_region() was done
++ * in vmbus_allocate_mmio().
++ */
++ if (fb_mmio && start >= fb_mmio->start &&
++ (start + size - 1 <= fb_mmio->end))
++ goto skip_shadow_release;
++
+ for (iter = hyperv_mmio; iter; iter = iter->sibling) {
+ if ((iter->start >= start + size) || (iter->end <= start))
+ continue;
+
+ __release_region(iter, start, size);
+ }
++
++skip_shadow_release:
+ release_mem_region(start, size);
+ mutex_unlock(&hyperv_mmio_lock);
+
+--
+2.39.5
+
--- /dev/null
+From 36382c5f590f273138fb18e7db34494104ddab91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Feb 2025 11:34:41 -0800
+Subject: drm/hyperv: Fix address space leak when Hyper-V DRM device is removed
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+[ Upstream commit aed709355fd05ef747e1af24a1d5d78cd7feb81e ]
+
+When a Hyper-V DRM device is probed, the driver allocates MMIO space for
+the vram, and maps it cacheable. If the device removed, or in the error
+path for device probing, the MMIO space is released but no unmap is done.
+Consequently the kernel address space for the mapping is leaked.
+
+Fix this by adding iounmap() calls in the device removal path, and in the
+error path during device probing.
+
+Fixes: f1f63cbb705d ("drm/hyperv: Fix an error handling path in hyperv_vmbus_probe()")
+Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Tested-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Link: https://lore.kernel.org/r/20250210193441.2414-1-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20250210193441.2414-1-mhklinux@outlook.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+index ff93e08d5036d..5f02a5a39ab4a 100644
+--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
++++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+@@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
+ return 0;
+
+ err_free_mmio:
++ iounmap(hv->vram);
+ vmbus_free_mmio(hv->mem->start, hv->fb_size);
+ err_vmbus_close:
+ vmbus_close(hdev->channel);
+@@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev)
+ vmbus_close(hdev->channel);
+ hv_set_drvdata(hdev, NULL);
+
++ iounmap(hv->vram);
+ vmbus_free_mmio(hv->mem->start, hv->fb_size);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 8551679b1333fc61d338511c88fe2f0a0b5ee744 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:15 +0000
+Subject: eth: bnxt: do not update checksum in bnxt_xdp_build_skb()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit c03e7d05aa0e2f7e9a9ce5ad8a12471a53f941dc ]
+
+The bnxt_rx_pkt() updates ip_summed value at the end if checksum offload
+is enabled.
+When the XDP-MB program is attached and it returns XDP_PASS, the
+bnxt_xdp_build_skb() is called to update skb_shared_info.
+The main purpose of bnxt_xdp_build_skb() is to update skb_shared_info,
+but it updates ip_summed value too if checksum offload is enabled.
+This is actually duplicate work.
+
+When the bnxt_rx_pkt() updates ip_summed value, it checks if ip_summed
+is CHECKSUM_NONE or not.
+It means that ip_summed should be CHECKSUM_NONE at this moment.
+But ip_summed may already be updated to CHECKSUM_UNNECESSARY in the
+XDP-MB-PASS path.
+So the by skb_checksum_none_assert() WARNS about it.
+
+This is duplicate work and updating ip_summed in the
+bnxt_xdp_build_skb() is not needed.
+
+Splat looks like:
+WARNING: CPU: 3 PID: 5782 at ./include/linux/skbuff.h:5155 bnxt_rx_pkt+0x479b/0x7610 [bnxt_en]
+Modules linked in: bnxt_re bnxt_en rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs veth xt_nat xt_tcpudp xt_conntrack nft_chain_nat xt_MASQUERADE nf_]
+CPU: 3 UID: 0 PID: 5782 Comm: socat Tainted: G W 6.14.0-rc4+ #27
+Tainted: [W]=WARN
+Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021
+RIP: 0010:bnxt_rx_pkt+0x479b/0x7610 [bnxt_en]
+Code: 54 24 0c 4c 89 f1 4c 89 ff c1 ea 1f ff d3 0f 1f 00 49 89 c6 48 85 c0 0f 84 4c e5 ff ff 48 89 c7 e8 ca 3d a0 c8 e9 8f f4 ff ff <0f> 0b f
+RSP: 0018:ffff88881ba09928 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: 00000000c7590303 RCX: 0000000000000000
+RDX: 1ffff1104e7d1610 RSI: 0000000000000001 RDI: ffff8881c91300b8
+RBP: ffff88881ba09b28 R08: ffff888273e8b0d0 R09: ffff888273e8b070
+R10: ffff888273e8b010 R11: ffff888278b0f000 R12: ffff888273e8b080
+R13: ffff8881c9130e00 R14: ffff8881505d3800 R15: ffff888273e8b000
+FS: 00007f5a2e7be080(0000) GS:ffff88881ba00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fff2e708ff8 CR3: 000000013e3b0000 CR4: 00000000007506f0
+PKRU: 55555554
+Call Trace:
+ <IRQ>
+ ? __warn+0xcd/0x2f0
+ ? bnxt_rx_pkt+0x479b/0x7610
+ ? report_bug+0x326/0x3c0
+ ? handle_bug+0x53/0xa0
+ ? exc_invalid_op+0x14/0x50
+ ? asm_exc_invalid_op+0x16/0x20
+ ? bnxt_rx_pkt+0x479b/0x7610
+ ? bnxt_rx_pkt+0x3e41/0x7610
+ ? __pfx_bnxt_rx_pkt+0x10/0x10
+ ? napi_complete_done+0x2cf/0x7d0
+ __bnxt_poll_work+0x4e8/0x1220
+ ? __pfx___bnxt_poll_work+0x10/0x10
+ ? __pfx_mark_lock.part.0+0x10/0x10
+ bnxt_poll_p5+0x36a/0xfa0
+ ? __pfx_bnxt_poll_p5+0x10/0x10
+ __napi_poll.constprop.0+0xa0/0x440
+ net_rx_action+0x899/0xd00
+...
+
+Following ping.py patch adds xdp-mb-pass case. so ping.py is going
+to be able to reproduce this issue.
+
+Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Link: https://patch.msgid.link/20250309134219.91670-5-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++---------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +--
+ 3 files changed, 5 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 442c85b3ea3f3..7293d7732d6ce 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2205,7 +2205,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+ if (!skb)
+ goto oom_next_rx;
+ } else {
+- skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1);
++ skb = bnxt_xdp_build_skb(bp, skb, agg_bufs,
++ rxr->page_pool, &xdp);
+ if (!skb) {
+ /* we should be able to free the old skb here */
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index f9e7e71b89485..8726657f5cb9e 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -456,20 +456,13 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+
+ struct sk_buff *
+ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
+- struct page_pool *pool, struct xdp_buff *xdp,
+- struct rx_cmp_ext *rxcmp1)
++ struct page_pool *pool, struct xdp_buff *xdp)
+ {
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+ if (!skb)
+ return NULL;
+- skb_checksum_none_assert(skb);
+- if (RX_CMP_L4_CS_OK(rxcmp1)) {
+- if (bp->dev->features & NETIF_F_RXCSUM) {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- skb->csum_level = RX_CMP_ENCAP(rxcmp1);
+- }
+- }
++
+ xdp_update_skb_shared_info(skb, num_frags,
+ sinfo->xdp_frags_size,
+ BNXT_RX_PAGE_SIZE * num_frags,
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+index 0122782400b8a..220285e190fcd 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+@@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+ struct xdp_buff *xdp);
+ struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb,
+ u8 num_frags, struct page_pool *pool,
+- struct xdp_buff *xdp,
+- struct rx_cmp_ext *rxcmp1);
++ struct xdp_buff *xdp);
+ #endif
+--
+2.39.5
+
--- /dev/null
+From 83b70c466e2be35e77a37377ebf3a84e1165fb40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:14 +0000
+Subject: eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue
+ restart logic
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 661958552eda5bf64bfafb4821cbdded935f1f68 ]
+
+When a queue is restarted, it sets MRU to 0 for stopping packet flow.
+MRU variable is a member of vnic_info[], the first vnic_info is default
+and the second is ntuple.
+Only when ntuple is enabled(ethtool -K eth0 ntuple on), vnic_info for
+ntuple is allocated in init logic.
+The bp->nr_vnics indicates how many vnic_info are allocated.
+However bnxt_queue_{start | stop}() accesses vnic_info[BNXT_VNIC_NTUPLE]
+regardless of ntuple state.
+
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Fixes: b9d2956e869c ("bnxt_en: stop packet flow during bnxt_queue_stop/start")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Link: https://patch.msgid.link/20250309134219.91670-4-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index a476f9da40c27..442c85b3ea3f3 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -15287,7 +15287,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
+ cpr = &rxr->bnapi->cp_ring;
+ cpr->sw_stats->rx.rx_resets++;
+
+- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
++ for (i = 0; i <= bp->nr_vnics; i++) {
+ vnic = &bp->vnic_info[i];
+
+ rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true);
+@@ -15315,7 +15315,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
+ struct bnxt_vnic_info *vnic;
+ int i;
+
+- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
++ for (i = 0; i <= bp->nr_vnics; i++) {
+ vnic = &bp->vnic_info[i];
+ vnic->mru = 0;
+ bnxt_hwrm_vnic_update(bp, vnic,
+--
+2.39.5
+
--- /dev/null
+From 84c9a6911d7b8cc92657082c3a03631cdc2c3eb9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:16 +0000
+Subject: eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx}
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit f09af5fdfbd9b0fcee73aab1116904c53b199e97 ]
+
+When qstats-get operation is executed, callbacks of netdev_stats_ops
+are called. The bnxt_get_queue_stats{rx | tx} collect per-queue stats
+from sw_stats in the rings.
+But {rx | tx | cp}_ring are allocated when the interface is up.
+So, these rings are not allocated when the interface is down.
+
+The qstats-get is allowed even if the interface is down. However,
+the bnxt_get_queue_stats{rx | tx}() accesses cp_ring and tx_ring
+without null check.
+So, it needs to avoid accessing rings if the interface is down.
+
+Reproducer:
+ ip link set $interface down
+ ./cli.py --spec netdev.yaml --dump qstats-get
+OR
+ ip link set $interface down
+ python ./stats.py
+
+Splat looks like:
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 1680fa067 P4D 1680fa067 PUD 16be3b067 PMD 0
+ Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 0 UID: 0 PID: 1495 Comm: python3 Not tainted 6.14.0-rc4+ #32 5cd0f999d5a15c574ac72b3e4b907341
+ Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021
+ RIP: 0010:bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en]
+ Code: c6 87 b5 18 00 00 02 eb a2 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 01
+ RSP: 0018:ffffabef43cdb7e0 EFLAGS: 00010282
+ RAX: 0000000000000000 RBX: ffffffffc04c8710 RCX: 0000000000000000
+ RDX: ffffabef43cdb858 RSI: 0000000000000000 RDI: ffff8d504e850000
+ RBP: ffff8d506c9f9c00 R08: 0000000000000004 R09: ffff8d506bcd901c
+ R10: 0000000000000015 R11: ffff8d506bcd9000 R12: 0000000000000000
+ R13: ffffabef43cdb8c0 R14: ffff8d504e850000 R15: 0000000000000000
+ FS: 00007f2c5462b080(0000) GS:ffff8d575f600000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000000 CR3: 0000000167fd0000 CR4: 00000000007506f0
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ? __die+0x20/0x70
+ ? page_fault_oops+0x15a/0x460
+ ? sched_balance_find_src_group+0x58d/0xd10
+ ? exc_page_fault+0x6e/0x180
+ ? asm_exc_page_fault+0x22/0x30
+ ? bnxt_get_queue_stats_rx+0xf/0x70 [bnxt_en cdd546fd48563c280cfd30e9647efa420db07bf1]
+ netdev_nl_stats_by_netdev+0x2b1/0x4e0
+ ? xas_load+0x9/0xb0
+ ? xas_find+0x183/0x1d0
+ ? xa_find+0x8b/0xe0
+ netdev_nl_qstats_get_dumpit+0xbf/0x1e0
+ genl_dumpit+0x31/0x90
+ netlink_dump+0x1a8/0x360
+
+Fixes: af7b3b4adda5 ("eth: bnxt: support per-queue statistics")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Link: https://patch.msgid.link/20250309134219.91670-6-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 7293d7732d6ce..eba0f9991476c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -15032,6 +15032,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i,
+ struct bnxt_cp_ring_info *cpr;
+ u64 *sw;
+
++ if (!bp->bnapi)
++ return;
++
+ cpr = &bp->bnapi[i]->cp_ring;
+ sw = cpr->stats.sw_stats;
+
+@@ -15055,6 +15058,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i,
+ struct bnxt_napi *bnapi;
+ u64 *sw;
+
++ if (!bp->tx_ring)
++ return;
++
+ bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi;
+ sw = bnapi->cp_ring.stats.sw_stats;
+
+--
+2.39.5
+
--- /dev/null
+From fbee9c70a99753b8cabff12fb2267b6150ab63d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:17 +0000
+Subject: eth: bnxt: fix memory leak in queue reset
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 87dd2850835dd7886726b428a8ef7d73a60520c7 ]
+
+When the queue is reset, the bnxt_alloc_one_tpa_info() is called to
+allocate tpa_info for the new queue.
+And then the old queue's tpa_info should be removed by the
+bnxt_free_one_tpa_info(), but it is not called.
+So memory leak occurs.
+It adds the bnxt_free_one_tpa_info() in the bnxt_queue_mem_free().
+
+unreferenced object 0xffff888293cc0000 (size 16384):
+ comm "ncdevmem", pid 2076, jiffies 4296604081
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 40 75 78 93 82 88 ff ff ........@ux.....
+ 40 75 78 93 02 00 00 00 00 00 00 00 00 00 00 00 @ux.............
+ backtrace (crc 5d7d4798):
+ ___kmalloc_large_node+0x10d/0x1b0
+ __kmalloc_large_node_noprof+0x17/0x60
+ __kmalloc_noprof+0x3f6/0x520
+ bnxt_alloc_one_tpa_info+0x5f/0x300 [bnxt_en]
+ bnxt_queue_mem_alloc+0x8e8/0x14f0 [bnxt_en]
+ netdev_rx_queue_restart+0x233/0x620
+ net_devmem_bind_dmabuf_to_queue+0x2a3/0x600
+ netdev_nl_bind_rx_doit+0xc00/0x10a0
+ genl_family_rcv_msg_doit+0x1d4/0x2b0
+ genl_rcv_msg+0x3fb/0x6c0
+ netlink_rcv_skb+0x12c/0x360
+ genl_rcv+0x24/0x40
+ netlink_unicast+0x447/0x710
+ netlink_sendmsg+0x712/0xbc0
+ __sys_sendto+0x3fd/0x4d0
+ __x64_sys_sendto+0xdc/0x1b0
+
+Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Link: https://patch.msgid.link/20250309134219.91670-7-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index ee52ac821ef9a..e7580df13229a 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -15240,6 +15240,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
+ struct bnxt_ring_struct *ring;
+
+ bnxt_free_one_rx_ring_skbs(bp, rxr);
++ bnxt_free_one_tpa_info(bp, rxr);
+
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
+--
+2.39.5
+
--- /dev/null
+From efd78950e2d2365340b825d41f6646f462ce6a0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:12 +0000
+Subject: eth: bnxt: fix truesize for mb-xdp-pass case
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 9f7b2aa5034e24d3c49db73d5f760c0435fe31c2 ]
+
+When mb-xdp is set and return is XDP_PASS, packet is converted from
+xdp_buff to sk_buff with xdp_update_skb_shared_info() in
+bnxt_xdp_build_skb().
+bnxt_xdp_build_skb() passes incorrect truesize argument to
+xdp_update_skb_shared_info().
+The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags but
+the skb_shared_info was wiped by napi_build_skb() before.
+So it stores sinfo->nr_frags before bnxt_xdp_build_skb() and use it
+instead of getting skb_shared_info from xdp_get_shared_info_from_buff().
+
+Splat looks like:
+ ------------[ cut here ]------------
+ WARNING: CPU: 2 PID: 0 at net/core/skbuff.c:6072 skb_try_coalesce+0x504/0x590
+ Modules linked in: xt_nat xt_tcpudp veth af_packet xt_conntrack nft_chain_nat xt_MASQUERADE nf_conntrack_netlink xfrm_user xt_addrtype nft_coms
+ CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 6.14.0-rc2+ #3
+ RIP: 0010:skb_try_coalesce+0x504/0x590
+ Code: 4b fd ff ff 49 8b 34 24 40 80 e6 40 0f 84 3d fd ff ff 49 8b 74 24 48 40 f6 c6 01 0f 84 2e fd ff ff 48 8d 4e ff e9 25 fd ff ff <0f> 0b e99
+ RSP: 0018:ffffb62c4120caa8 EFLAGS: 00010287
+ RAX: 0000000000000003 RBX: ffffb62c4120cb14 RCX: 0000000000000ec0
+ RDX: 0000000000001000 RSI: ffffa06e5d7dc000 RDI: 0000000000000003
+ RBP: ffffa06e5d7ddec0 R08: ffffa06e6120a800 R09: ffffa06e7a119900
+ R10: 0000000000002310 R11: ffffa06e5d7dcec0 R12: ffffe4360575f740
+ R13: ffffe43600000000 R14: 0000000000000002 R15: 0000000000000002
+ FS: 0000000000000000(0000) GS:ffffa0755f700000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f147b76b0f8 CR3: 00000001615d4000 CR4: 00000000007506f0
+ PKRU: 55555554
+ Call Trace:
+ <IRQ>
+ ? __warn+0x84/0x130
+ ? skb_try_coalesce+0x504/0x590
+ ? report_bug+0x18a/0x1a0
+ ? handle_bug+0x53/0x90
+ ? exc_invalid_op+0x14/0x70
+ ? asm_exc_invalid_op+0x16/0x20
+ ? skb_try_coalesce+0x504/0x590
+ inet_frag_reasm_finish+0x11f/0x2e0
+ ip_defrag+0x37a/0x900
+ ip_local_deliver+0x51/0x120
+ ip_sublist_rcv_finish+0x64/0x70
+ ip_sublist_rcv+0x179/0x210
+ ip_list_rcv+0xf9/0x130
+
+How to reproduce:
+<Node A>
+ip link set $interface1 xdp obj xdp_pass.o
+ip link set $interface1 mtu 9000 up
+ip a a 10.0.0.1/24 dev $interface1
+<Node B>
+ip link set $interfac2 mtu 9000 up
+ip a a 10.0.0.2/24 dev $interface2
+ping 10.0.0.1 -s 65000
+
+Following ping.py patch adds xdp-mb-pass case. so ping.py is going to be
+able to reproduce this issue.
+
+Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Link: https://patch.msgid.link/20250309134219.91670-2-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 603e9c968c44b..39a5ea4d3b4ea 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2025,6 +2025,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+ struct rx_cmp_ext *rxcmp1;
+ u32 tmp_raw_cons = *raw_cons;
+ u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
++ struct skb_shared_info *sinfo;
+ struct bnxt_sw_rx_bd *rx_buf;
+ unsigned int len;
+ u8 *data_ptr, agg_bufs, cmp_type;
+@@ -2151,6 +2152,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+ false);
+ if (!frag_len)
+ goto oom_next_rx;
++
+ }
+ xdp_active = true;
+ }
+@@ -2160,6 +2162,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+ rc = 1;
+ goto next_rx;
+ }
++ if (xdp_buff_has_frags(&xdp)) {
++ sinfo = xdp_get_shared_info_from_buff(&xdp);
++ agg_bufs = sinfo->nr_frags;
++ } else {
++ agg_bufs = 0;
++ }
+ }
+
+ if (len <= bp->rx_copy_thresh) {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index dc51dce209d5f..f9e7e71b89485 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -472,7 +472,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
+ }
+ xdp_update_skb_shared_info(skb, num_frags,
+ sinfo->xdp_frags_size,
+- BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
++ BNXT_RX_PAGE_SIZE * num_frags,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+ return skb;
+ }
+--
+2.39.5
+
--- /dev/null
+From a04d838b08ae7faf0486154e9f66253c06ab906a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 13:42:13 +0000
+Subject: eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit ca2456e073957781e1184de68551c65161b2bd30 ]
+
+The bnxt_queue_mem_alloc() is called to allocate new queue memory when
+a queue is restarted.
+It internally accesses rx buffer descriptor corresponding to the index.
+The rx buffer descriptor is allocated and set when the interface is up
+and it's freed when the interface is down.
+So, if queue is restarted if interface is down, kernel panic occurs.
+
+Splat looks like:
+ BUG: unable to handle page fault for address: 000000000000b240
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 3 UID: 0 PID: 1563 Comm: ncdevmem2 Not tainted 6.14.0-rc2+ #9 844ddba6e7c459cafd0bf4db9a3198e
+ Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021
+ RIP: 0010:bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en]
+ Code: 41 54 4d 89 c4 4d 69 c0 c0 05 00 00 55 48 89 f5 53 48 89 fb 4c 8d b5 40 05 00 00 48 83 ec 15
+ RSP: 0018:ffff9dcc83fef9e8 EFLAGS: 00010202
+ RAX: ffffffffc0457720 RBX: ffff934ed8d40000 RCX: 0000000000000000
+ RDX: 000000000000001f RSI: ffff934ea508f800 RDI: ffff934ea508f808
+ RBP: ffff934ea508f800 R08: 000000000000b240 R09: ffff934e84f4b000
+ R10: ffff9dcc83fefa30 R11: ffff934e84f4b000 R12: 000000000000001f
+ R13: ffff934ed8d40ac0 R14: ffff934ea508fd40 R15: ffff934e84f4b000
+ FS: 00007fa73888c740(0000) GS:ffff93559f780000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000000000000b240 CR3: 0000000145a2e000 CR4: 00000000007506f0
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ? __die+0x20/0x70
+ ? page_fault_oops+0x15a/0x460
+ ? exc_page_fault+0x6e/0x180
+ ? asm_exc_page_fault+0x22/0x30
+ ? __pfx_bnxt_queue_mem_alloc+0x10/0x10 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7]
+ ? bnxt_queue_mem_alloc+0x3f/0x4e0 [bnxt_en 7f85e76f4d724ba07471d7e39d9e773aea6597b7]
+ netdev_rx_queue_restart+0xc5/0x240
+ net_devmem_bind_dmabuf_to_queue+0xf8/0x200
+ netdev_nl_bind_rx_doit+0x3a7/0x450
+ genl_family_rcv_msg_doit+0xd9/0x130
+ genl_rcv_msg+0x184/0x2b0
+ ? __pfx_netdev_nl_bind_rx_doit+0x10/0x10
+ ? __pfx_genl_rcv_msg+0x10/0x10
+ netlink_rcv_skb+0x54/0x100
+ genl_rcv+0x24/0x40
+...
+
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Mina Almasry <almasrymina@google.com>
+Link: https://patch.msgid.link/20250309134219.91670-3-ap420073@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 39a5ea4d3b4ea..a476f9da40c27 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -15108,6 +15108,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
+ struct bnxt_ring_struct *ring;
+ int rc;
+
++ if (!bp->rx_ring)
++ return -ENETDOWN;
++
+ rxr = &bp->rx_ring[idx];
+ clone = qmem;
+ memcpy(clone, rxr, sizeof(*rxr));
+--
+2.39.5
+
--- /dev/null
+From 15fe147fe8a10db4840afc53263cf1c7cbea0368 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Nov 2024 19:51:19 -0800
+Subject: eth: bnxt: use page pool for head frags
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 7ed816be35abc3d5bed39d3edc5f2efed2ca5216 ]
+
+Testing small size RPCs (300B-400B) on a large AMD system suggests
+that page pool recycling is very useful even for just the head frags.
+With this patch (and copy break disabled) I see a 30% performance
+improvement (82Gbps -> 106Gbps).
+
+Convert bnxt from normal page frags to page pool frags for head buffers.
+
+On systems with small page size we can use the same pool as for TPA
+pages. On systems with large pages the frag allocation logic of the
+page pool is already used to split a large page into TPA chunks.
+TPA chunks are much larger than heads (8k or 64k, AFAICT vs 1kB)
+and we always allocate the same sized chunks. Mixing allocation
+of TPA and head pages would lead to sub-optimal memory use.
+Plus Taehee's work on zero-copy / devmem will need to differentiate
+between TPA and non-TPA page pool, anyway. Conditionally allocate
+a new page pool for heads.
+
+Link: https://patch.msgid.link/20241109035119.3391864-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 87dd2850835d ("eth: bnxt: fix memory leak in queue reset")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 98 ++++++++++++-----------
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 +
+ 2 files changed, 51 insertions(+), 48 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index eba0f9991476c..b97bced5c002c 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -864,6 +864,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+ bnapi->events &= ~BNXT_TX_CMP_EVENT;
+ }
+
++static bool bnxt_separate_head_pool(void)
++{
++ return PAGE_SIZE > BNXT_RX_PAGE_SIZE;
++}
++
+ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+ struct bnxt_rx_ring_info *rxr,
+ unsigned int *offset,
+@@ -886,27 +891,19 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+ }
+
+ static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping,
++ struct bnxt_rx_ring_info *rxr,
+ gfp_t gfp)
+ {
+- u8 *data;
+- struct pci_dev *pdev = bp->pdev;
++ unsigned int offset;
++ struct page *page;
+
+- if (gfp == GFP_ATOMIC)
+- data = napi_alloc_frag(bp->rx_buf_size);
+- else
+- data = netdev_alloc_frag(bp->rx_buf_size);
+- if (!data)
++ page = page_pool_alloc_frag(rxr->head_pool, &offset,
++ bp->rx_buf_size, gfp);
++ if (!page)
+ return NULL;
+
+- *mapping = dma_map_single_attrs(&pdev->dev, data + bp->rx_dma_offset,
+- bp->rx_buf_use_size, bp->rx_dir,
+- DMA_ATTR_WEAK_ORDERING);
+-
+- if (dma_mapping_error(&pdev->dev, *mapping)) {
+- skb_free_frag(data);
+- data = NULL;
+- }
+- return data;
++ *mapping = page_pool_get_dma_addr(page) + bp->rx_dma_offset + offset;
++ return page_address(page) + offset;
+ }
+
+ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+@@ -928,7 +925,7 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ rx_buf->data = page;
+ rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset;
+ } else {
+- u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp);
++ u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, rxr, gfp);
+
+ if (!data)
+ return -ENOMEM;
+@@ -1179,13 +1176,14 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
+ }
+
+ skb = napi_build_skb(data, bp->rx_buf_size);
+- dma_unmap_single_attrs(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
+- bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
++ dma_sync_single_for_cpu(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
++ bp->rx_dir);
+ if (!skb) {
+- skb_free_frag(data);
++ page_pool_free_va(rxr->head_pool, data, true);
+ return NULL;
+ }
+
++ skb_mark_for_recycle(skb);
+ skb_reserve(skb, bp->rx_offset);
+ skb_put(skb, offset_and_len & 0xffff);
+ return skb;
+@@ -1840,7 +1838,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
+ u8 *new_data;
+ dma_addr_t new_mapping;
+
+- new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC);
++ new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, rxr,
++ GFP_ATOMIC);
+ if (!new_data) {
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
+ cpr->sw_stats->rx.rx_oom_discards += 1;
+@@ -1852,16 +1851,16 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
+ tpa_info->mapping = new_mapping;
+
+ skb = napi_build_skb(data, bp->rx_buf_size);
+- dma_unmap_single_attrs(&bp->pdev->dev, mapping,
+- bp->rx_buf_use_size, bp->rx_dir,
+- DMA_ATTR_WEAK_ORDERING);
++ dma_sync_single_for_cpu(&bp->pdev->dev, mapping,
++ bp->rx_buf_use_size, bp->rx_dir);
+
+ if (!skb) {
+- skb_free_frag(data);
++ page_pool_free_va(rxr->head_pool, data, true);
+ bnxt_abort_tpa(cpr, idx, agg_bufs);
+ cpr->sw_stats->rx.rx_oom_discards += 1;
+ return NULL;
+ }
++ skb_mark_for_recycle(skb);
+ skb_reserve(skb, bp->rx_offset);
+ skb_put(skb, len);
+ }
+@@ -3325,28 +3324,22 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
+
+ static void bnxt_free_one_rx_ring(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+ {
+- struct pci_dev *pdev = bp->pdev;
+ int i, max_idx;
+
+ max_idx = bp->rx_nr_pages * RX_DESC_CNT;
+
+ for (i = 0; i < max_idx; i++) {
+ struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+- dma_addr_t mapping = rx_buf->mapping;
+ void *data = rx_buf->data;
+
+ if (!data)
+ continue;
+
+ rx_buf->data = NULL;
+- if (BNXT_RX_PAGE_MODE(bp)) {
++ if (BNXT_RX_PAGE_MODE(bp))
+ page_pool_recycle_direct(rxr->page_pool, data);
+- } else {
+- dma_unmap_single_attrs(&pdev->dev, mapping,
+- bp->rx_buf_use_size, bp->rx_dir,
+- DMA_ATTR_WEAK_ORDERING);
+- skb_free_frag(data);
+- }
++ else
++ page_pool_free_va(rxr->head_pool, data, true);
+ }
+ }
+
+@@ -3373,7 +3366,6 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info
+ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
+ {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
+- struct pci_dev *pdev = bp->pdev;
+ struct bnxt_tpa_idx_map *map;
+ int i;
+
+@@ -3387,13 +3379,8 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
+ if (!data)
+ continue;
+
+- dma_unmap_single_attrs(&pdev->dev, tpa_info->mapping,
+- bp->rx_buf_use_size, bp->rx_dir,
+- DMA_ATTR_WEAK_ORDERING);
+-
+ tpa_info->data = NULL;
+-
+- skb_free_frag(data);
++ page_pool_free_va(rxr->head_pool, data, false);
+ }
+
+ skip_rx_tpa_free:
+@@ -3609,7 +3596,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
+ page_pool_destroy(rxr->page_pool);
+- rxr->page_pool = NULL;
++ if (rxr->page_pool != rxr->head_pool)
++ page_pool_destroy(rxr->head_pool);
++ rxr->page_pool = rxr->head_pool = NULL;
+
+ kfree(rxr->rx_agg_bmap);
+ rxr->rx_agg_bmap = NULL;
+@@ -3627,6 +3616,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
+ int numa_node)
+ {
+ struct page_pool_params pp = { 0 };
++ struct page_pool *pool;
+
+ pp.pool_size = bp->rx_agg_ring_size;
+ if (BNXT_RX_PAGE_MODE(bp))
+@@ -3639,14 +3629,25 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
+ pp.max_len = PAGE_SIZE;
+ pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+
+- rxr->page_pool = page_pool_create(&pp);
+- if (IS_ERR(rxr->page_pool)) {
+- int err = PTR_ERR(rxr->page_pool);
++ pool = page_pool_create(&pp);
++ if (IS_ERR(pool))
++ return PTR_ERR(pool);
++ rxr->page_pool = pool;
+
+- rxr->page_pool = NULL;
+- return err;
++ if (bnxt_separate_head_pool()) {
++ pp.pool_size = max(bp->rx_ring_size, 1024);
++ pool = page_pool_create(&pp);
++ if (IS_ERR(pool))
++ goto err_destroy_pp;
+ }
++ rxr->head_pool = pool;
++
+ return 0;
++
++err_destroy_pp:
++ page_pool_destroy(rxr->page_pool);
++ rxr->page_pool = NULL;
++ return PTR_ERR(pool);
+ }
+
+ static int bnxt_alloc_rx_rings(struct bnxt *bp)
+@@ -4197,7 +4198,8 @@ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr)
+ u8 *data;
+
+ for (i = 0; i < bp->max_tpa; i++) {
+- data = __bnxt_alloc_rx_frag(bp, &mapping, GFP_KERNEL);
++ data = __bnxt_alloc_rx_frag(bp, &mapping, rxr,
++ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index bee645f58d0bd..1758edcd1db42 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1108,6 +1108,7 @@ struct bnxt_rx_ring_info {
+ struct bnxt_ring_struct rx_agg_ring_struct;
+ struct xdp_rxq_info xdp_rxq;
+ struct page_pool *page_pool;
++ struct page_pool *head_pool;
+ };
+
+ struct bnxt_rx_sw_stats {
+--
+2.39.5
+
--- /dev/null
+From f3b212c812cf7a1f46e8e4dec91242f2c59679ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Mar 2025 08:16:31 -0800
+Subject: fbdev: hyperv_fb: Allow graceful removal of framebuffer
+
+From: Saurabh Sengar <ssengar@linux.microsoft.com>
+
+[ Upstream commit ea2f45ab0e53b255f72c85ccd99e2b394fc5fceb ]
+
+When a Hyper-V framebuffer device is unbind, hyperv_fb driver tries to
+release the framebuffer forcefully. If this framebuffer is in use it
+produce the following WARN and hence this framebuffer is never released.
+
+[ 44.111220] WARNING: CPU: 35 PID: 1882 at drivers/video/fbdev/core/fb_info.c:70 framebuffer_release+0x2c/0x40
+< snip >
+[ 44.111289] Call Trace:
+[ 44.111290] <TASK>
+[ 44.111291] ? show_regs+0x6c/0x80
+[ 44.111295] ? __warn+0x8d/0x150
+[ 44.111298] ? framebuffer_release+0x2c/0x40
+[ 44.111300] ? report_bug+0x182/0x1b0
+[ 44.111303] ? handle_bug+0x6e/0xb0
+[ 44.111306] ? exc_invalid_op+0x18/0x80
+[ 44.111308] ? asm_exc_invalid_op+0x1b/0x20
+[ 44.111311] ? framebuffer_release+0x2c/0x40
+[ 44.111313] ? hvfb_remove+0x86/0xa0 [hyperv_fb]
+[ 44.111315] vmbus_remove+0x24/0x40 [hv_vmbus]
+[ 44.111323] device_remove+0x40/0x80
+[ 44.111325] device_release_driver_internal+0x20b/0x270
+[ 44.111327] ? bus_find_device+0xb3/0xf0
+
+Fix this by moving the release of framebuffer and assosiated memory
+to fb_ops.fb_destroy function, so that framebuffer framework handles
+it gracefully.
+
+While we fix this, also replace manual registrations/unregistration of
+framebuffer with devm_register_framebuffer.
+
+Fixes: 68a2d20b79b1 ("drivers/video: add Hyper-V Synthetic Video Frame Buffer Driver")
+
+Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Reviewed-by: Michael Kelley <mhklinux@outlook.com>
+Tested-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <1740845791-19977-3-git-send-email-ssengar@linux.microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
+index 9798a34ac571f..75338ffc703fb 100644
+--- a/drivers/video/fbdev/hyperv_fb.c
++++ b/drivers/video/fbdev/hyperv_fb.c
+@@ -282,6 +282,8 @@ static uint screen_depth;
+ static uint screen_fb_size;
+ static uint dio_fb_size; /* FB size for deferred IO */
+
++static void hvfb_putmem(struct fb_info *info);
++
+ /* Send message to Hyper-V host */
+ static inline int synthvid_send(struct hv_device *hdev,
+ struct synthvid_msg *msg)
+@@ -862,6 +864,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width,
+ hvfb_ondemand_refresh_throttle(par, x, y, width, height);
+ }
+
++/*
++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
++ * of unregister_framebuffer() or fb_release(). Do any cleanup related to
++ * framebuffer here.
++ */
++static void hvfb_destroy(struct fb_info *info)
++{
++ hvfb_putmem(info);
++ framebuffer_release(info);
++}
++
+ /*
+ * TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the
+ * driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases.
+@@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = {
+ .fb_set_par = hvfb_set_par,
+ .fb_setcolreg = hvfb_setcolreg,
+ .fb_blank = hvfb_blank,
++ .fb_destroy = hvfb_destroy,
+ };
+
+ /* Get options from kernel paramenter "video=" */
+@@ -1178,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev,
+ if (ret)
+ goto error;
+
+- ret = register_framebuffer(info);
++ ret = devm_register_framebuffer(&hdev->device, info);
+ if (ret) {
+ pr_err("Unable to register framebuffer\n");
+ goto error;
+@@ -1226,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev)
+
+ fb_deferred_io_cleanup(info);
+
+- unregister_framebuffer(info);
+ cancel_delayed_work_sync(&par->dwork);
+
+ vmbus_close(hdev->channel);
+ hv_set_drvdata(hdev, NULL);
+-
+- hvfb_putmem(info);
+- framebuffer_release(info);
+ }
+
+ static int hvfb_suspend(struct hv_device *hdev)
+--
+2.39.5
+
--- /dev/null
+From 4bbf3a3afc71d3e55e59254287ff971901c90806 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 15:01:30 -0800
+Subject: fbdev: hyperv_fb: Fix hang in kdump kernel when on Hyper-V Gen 2 VMs
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+[ Upstream commit 304386373007aaca9236a3f36afac0bbedcd2bf0 ]
+
+Gen 2 Hyper-V VMs boot via EFI and have a standard EFI framebuffer
+device. When the kdump kernel runs in such a VM, loading the efifb
+driver may hang because of accessing the framebuffer at the wrong
+memory address.
+
+The scenario occurs when the hyperv_fb driver in the original kernel
+moves the framebuffer to a different MMIO address because of conflicts
+with an already-running efifb or simplefb driver. The hyperv_fb driver
+then informs Hyper-V of the change, which is allowed by the Hyper-V FB
+VMBus device protocol. However, when the kexec command loads the kdump
+kernel into crash memory via the kexec_file_load() system call, the
+system call doesn't know the framebuffer has moved, and it sets up the
+kdump screen_info using the original framebuffer address. The transition
+to the kdump kernel does not go through the Hyper-V host, so Hyper-V
+does not reset the framebuffer address like it would do on a reboot.
+When efifb tries to run, it accesses a non-existent framebuffer
+address, which traps to the Hyper-V host. After many such accesses,
+the Hyper-V host thinks the guest is being malicious, and throttles
+the guest to the point that it runs very slowly or appears to have hung.
+
+When the kdump kernel is loaded into crash memory via the kexec_load()
+system call, the problem does not occur. In this case, the kexec command
+builds the screen_info table itself in user space from data returned
+by the FBIOGET_FSCREENINFO ioctl against /dev/fb0, which gives it the
+new framebuffer location.
+
+This problem was originally reported in 2020 [1], resulting in commit
+3cb73bc3fa2a ("hyperv_fb: Update screen_info after removing old
+framebuffer"). This commit solved the problem by setting orig_video_isVGA
+to 0, so the kdump kernel was unaware of the EFI framebuffer. The efifb
+driver did not try to load, and no hang occurred. But in 2024, commit
+c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info")
+effectively reverted 3cb73bc3fa2a. Commit c25a19afb81c has no reference
+to 3cb73bc3fa2a, so perhaps it was done without knowing the implications
+that were reported with 3cb73bc3fa2a. In any case, as of commit
+c25a19afb81c, the original problem came back again.
+
+Interestingly, the hyperv_drm driver does not have this problem because
+it never moves the framebuffer. The difference is that the hyperv_drm
+driver removes any conflicting framebuffers *before* allocating an MMIO
+address, while the hyperv_fb drivers removes conflicting framebuffers
+*after* allocating an MMIO address. With the "after" ordering, hyperv_fb
+may encounter a conflict and move the framebuffer to a different MMIO
+address. But the conflict is essentially bogus because it is removed
+a few lines of code later.
+
+Rather than fix the problem with the approach from 2020 in commit
+3cb73bc3fa2a, instead slightly reorder the steps in hyperv_fb so
+conflicting framebuffers are removed before allocating an MMIO address.
+Then the default framebuffer MMIO address should always be available, and
+there's never any confusion about which framebuffer address the kdump
+kernel should use -- it's always the original address provided by
+the Hyper-V host. This approach is already used by the hyperv_drm
+driver, and is consistent with the usage guidelines at the head of
+the module with the function aperture_remove_conflicting_devices().
+
+This approach also solves a related minor problem when kexec_load()
+is used to load the kdump kernel. With current code, unbinding and
+rebinding the hyperv_fb driver could result in the framebuffer moving
+back to the default framebuffer address, because on the rebind there
+are no conflicts. If such a move is done after the kdump kernel is
+loaded with the new framebuffer address, at kdump time it could again
+have the wrong address.
+
+This problem and fix are described in terms of the kdump kernel, but
+it can also occur with any kernel started via kexec.
+
+See extensive discussion of the problem and solution at [2].
+
+[1] https://lore.kernel.org/linux-hyperv/20201014092429.1415040-1-kasong@redhat.com/
+[2] https://lore.kernel.org/linux-hyperv/BLAPR10MB521793485093FDB448F7B2E5FDE92@BLAPR10MB5217.namprd10.prod.outlook.com/
+
+Reported-by: Thomas Tai <thomas.tai@oracle.com>
+Fixes: c25a19afb81c ("fbdev/hyperv_fb: Do not clear global screen_info")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/20250218230130.3207-1-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20250218230130.3207-1-mhklinux@outlook.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/hyperv_fb.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
+index 363e4ccfcdb77..ce23d0ef5702a 100644
+--- a/drivers/video/fbdev/hyperv_fb.c
++++ b/drivers/video/fbdev/hyperv_fb.c
+@@ -989,6 +989,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+
+ base = pci_resource_start(pdev, 0);
+ size = pci_resource_len(pdev, 0);
++ aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME);
+
+ /*
+ * For Gen 1 VM, we can directly use the contiguous memory
+@@ -1010,11 +1011,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+ goto getmem_done;
+ }
+ pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n");
++ } else {
++ aperture_remove_all_conflicting_devices(KBUILD_MODNAME);
+ }
+
+ /*
+- * Cannot use the contiguous physical memory.
+- * Allocate mmio space for framebuffer.
++ * Cannot use contiguous physical memory, so allocate MMIO space for
++ * the framebuffer. At this point in the function, conflicting devices
++ * that might have claimed the framebuffer MMIO space based on
++ * screen_info.lfb_base must have already been removed so that
++ * vmbus_allocate_mmio() does not allocate different MMIO space. If the
++ * kdump image were to be loaded using kexec_file_load(), the
++ * framebuffer location in the kdump image would be set from
++ * screen_info.lfb_base at the time that kdump is enabled. If the
++ * framebuffer has moved elsewhere, this could be the wrong location,
++ * causing kdump to hang when efifb (for example) loads.
+ */
+ dio_fb_size =
+ screen_width * screen_height * screen_depth / 8;
+@@ -1051,11 +1062,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+ info->screen_size = dio_fb_size;
+
+ getmem_done:
+- if (base && size)
+- aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME);
+- else
+- aperture_remove_all_conflicting_devices(KBUILD_MODNAME);
+-
+ if (!gen2vm)
+ pci_dev_put(pdev);
+
+--
+2.39.5
+
--- /dev/null
+From 2f810e75fb7a55eca8e6620c2e54c9641ce33f36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Feb 2025 15:52:52 -0800
+Subject: fbdev: hyperv_fb: iounmap() the correct memory when removing a device
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+[ Upstream commit 7241c886a71797cc51efc6fadec7076fcf6435c2 ]
+
+When a Hyper-V framebuffer device is removed, or the driver is unbound
+from a device, any allocated and/or mapped memory must be released. In
+particular, MMIO address space that was mapped to the framebuffer must
+be unmapped. Current code unmaps the wrong address, resulting in an
+error like:
+
+[ 4093.980597] iounmap: bad address 00000000c936c05c
+
+followed by a stack dump.
+
+Commit d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for
+Hyper-V frame buffer driver") changed the kind of address stored in
+info->screen_base, and the iounmap() call in hvfb_putmem() was not
+updated accordingly.
+
+Fix this by updating hvfb_putmem() to unmap the correct address.
+
+Fixes: d21987d709e8 ("video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Link: https://lore.kernel.org/r/20250209235252.2987-1-mhklinux@outlook.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <20250209235252.2987-1-mhklinux@outlook.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/hyperv_fb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
+index 7fdb5edd7e2e8..363e4ccfcdb77 100644
+--- a/drivers/video/fbdev/hyperv_fb.c
++++ b/drivers/video/fbdev/hyperv_fb.c
+@@ -1080,7 +1080,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info)
+
+ if (par->need_docopy) {
+ vfree(par->dio_vp);
+- iounmap(info->screen_base);
++ iounmap(par->mmio_vp);
+ vmbus_free_mmio(par->mem->start, screen_fb_size);
+ } else {
+ hvfb_release_phymem(hdev, info->fix.smem_start,
+--
+2.39.5
+
--- /dev/null
+From 434cec8bdd868f3ef76002ea38fe9ecafcd7027a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 1 Mar 2025 08:16:30 -0800
+Subject: fbdev: hyperv_fb: Simplify hvfb_putmem
+
+From: Saurabh Sengar <ssengar@linux.microsoft.com>
+
+[ Upstream commit f5e728a50bb17336a20803dde488515b833ecd1d ]
+
+The device object required in 'hvfb_release_phymem' function
+for 'dma_free_coherent' can also be obtained from the 'info'
+pointer, making 'hdev' parameter in 'hvfb_putmem' redundant.
+Remove the unnecessary 'hdev' argument from 'hvfb_putmem'.
+
+Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
+Reviewed-by: Michael Kelley <mhklinux@outlook.com>
+Tested-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Message-ID: <1740845791-19977-2-git-send-email-ssengar@linux.microsoft.com>
+Stable-dep-of: ea2f45ab0e53 ("fbdev: hyperv_fb: Allow graceful removal of framebuffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/hyperv_fb.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
+index ce23d0ef5702a..9798a34ac571f 100644
+--- a/drivers/video/fbdev/hyperv_fb.c
++++ b/drivers/video/fbdev/hyperv_fb.c
+@@ -952,7 +952,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev,
+ }
+
+ /* Release contiguous physical memory */
+-static void hvfb_release_phymem(struct hv_device *hdev,
++static void hvfb_release_phymem(struct device *device,
+ phys_addr_t paddr, unsigned int size)
+ {
+ unsigned int order = get_order(size);
+@@ -960,7 +960,7 @@ static void hvfb_release_phymem(struct hv_device *hdev,
+ if (order <= MAX_PAGE_ORDER)
+ __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order);
+ else
+- dma_free_coherent(&hdev->device,
++ dma_free_coherent(device,
+ round_up(size, PAGE_SIZE),
+ phys_to_virt(paddr),
+ paddr);
+@@ -1080,7 +1080,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
+ }
+
+ /* Release the framebuffer */
+-static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info)
++static void hvfb_putmem(struct fb_info *info)
+ {
+ struct hvfb_par *par = info->par;
+
+@@ -1089,7 +1089,7 @@ static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info)
+ iounmap(par->mmio_vp);
+ vmbus_free_mmio(par->mem->start, screen_fb_size);
+ } else {
+- hvfb_release_phymem(hdev, info->fix.smem_start,
++ hvfb_release_phymem(info->device, info->fix.smem_start,
+ screen_fb_size);
+ }
+
+@@ -1203,7 +1203,7 @@ static int hvfb_probe(struct hv_device *hdev,
+
+ error:
+ fb_deferred_io_cleanup(info);
+- hvfb_putmem(hdev, info);
++ hvfb_putmem(info);
+ error2:
+ vmbus_close(hdev->channel);
+ error1:
+@@ -1232,7 +1232,7 @@ static void hvfb_remove(struct hv_device *hdev)
+ vmbus_close(hdev->channel);
+ hv_set_drvdata(hdev, NULL);
+
+- hvfb_putmem(hdev, info);
++ hvfb_putmem(info);
+ framebuffer_release(info);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 526f4503957cdf96a921bc691a98b6d346f2ee88 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Mar 2025 20:28:53 +0100
+Subject: gre: Fix IPv6 link-local address generation.
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 183185a18ff96751db52a46ccf93fff3a1f42815 ]
+
+Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE
+devices in most cases and fall back to using add_v4_addrs() only in
+case the GRE configuration is incompatible with addrconf_addr_gen().
+
+GRE used to use addrconf_addr_gen() until commit e5dd729460ca
+("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL
+address") restricted this use to gretap and ip6gretap devices, and
+created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones.
+
+The original problem came when commit 9af28511be10 ("addrconf: refuse
+isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its
+addr parameter was 0. The commit says that this would create an invalid
+address, however, I couldn't find any RFC saying that the generated
+interface identifier would be wrong. Anyway, since gre over IPv4
+devices pass their local tunnel address to __ipv6_isatap_ifid(), that
+commit broke their IPv6 link-local address generation when the local
+address was unspecified.
+
+Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT
+interfaces when computing v6LL address") tried to fix that case by
+defining add_v4_addrs() and calling it to generate the IPv6 link-local
+address instead of using addrconf_addr_gen() (apart for gretap and
+ip6gretap devices, which would still use the regular
+addrconf_addr_gen(), since they have a MAC address).
+
+That broke several use cases because add_v4_addrs() isn't properly
+integrated into the rest of IPv6 Neighbor Discovery code. Several of
+these shortcomings have been fixed over time, but add_v4_addrs()
+remains broken on several aspects. In particular, it doesn't send any
+Router Sollicitations, so the SLAAC process doesn't start until the
+interface receives a Router Advertisement. Also, add_v4_addrs() mostly
+ignores the address generation mode of the interface
+(/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the
+IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases.
+
+Fix the situation by using add_v4_addrs() only in the specific scenario
+where the normal method would fail. That is, for interfaces that have
+all of the following characteristics:
+
+ * run over IPv4,
+ * transport IP packets directly, not Ethernet (that is, not gretap
+ interfaces),
+ * tunnel endpoint is INADDR_ANY (that is, 0),
+ * device address generation mode is EUI64.
+
+In all other cases, revert back to the regular addrconf_addr_gen().
+
+Also, remove the special case for ip6gre interfaces in add_v4_addrs(),
+since ip6gre devices now always use addrconf_addr_gen() instead.
+
+Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://patch.msgid.link/559c32ce5c9976b269e6337ac9abb6a96abe5096.1741375285.git.gnault@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/addrconf.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index f7c17388ff6aa..26cdb66574757 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3237,16 +3237,13 @@ static void add_v4_addrs(struct inet6_dev *idev)
+ struct in6_addr addr;
+ struct net_device *dev;
+ struct net *net = dev_net(idev->dev);
+- int scope, plen, offset = 0;
++ int scope, plen;
+ u32 pflags = 0;
+
+ ASSERT_RTNL();
+
+ memset(&addr, 0, sizeof(struct in6_addr));
+- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
+- if (idev->dev->addr_len == sizeof(struct in6_addr))
+- offset = sizeof(struct in6_addr) - 4;
+- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
++ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+@@ -3557,7 +3554,13 @@ static void addrconf_gre_config(struct net_device *dev)
+ return;
+ }
+
+- if (dev->type == ARPHRD_ETHER) {
++ /* Generate the IPv6 link-local address using addrconf_addr_gen(),
++ * unless we have an IPv4 GRE device not bound to an IP address and
++ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this
++ * case). Such devices fall back to add_v4_addrs() instead.
++ */
++ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 &&
++ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) {
+ addrconf_addr_gen(idev, true);
+ return;
+ }
+--
+2.39.5
+
--- /dev/null
+From 2d65b5ddbd9e9dac378e8853c42004bc1e5bf383 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 15:08:53 +0100
+Subject: ice: do not configure destination override for switchdev
+
+From: Larysa Zaremba <larysa.zaremba@intel.com>
+
+[ Upstream commit 3be83ee9de0298f8321aa0b148d8f9995102e40f ]
+
+After switchdev is enabled and disabled later, LLDP packets sending stops,
+despite working perfectly fine before and during switchdev state.
+To reproduce (creating/destroying VF is what triggers the reconfiguration):
+
+devlink dev eswitch set pci/<address> mode switchdev
+echo '2' > /sys/class/net/<ifname>/device/sriov_numvfs
+echo '0' > /sys/class/net/<ifname>/device/sriov_numvfs
+
+This happens because LLDP relies on the destination override functionality.
+It needs to 1) set a flag in the descriptor, 2) set the VSI permission to
+make it valid. The permissions are set when the PF VSI is first configured,
+but switchdev then enables it for the uplink VSI (which is always the PF)
+once more when configured and disables when deconfigured, which leads to
+software-generated LLDP packets being blocked.
+
+Do not modify the destination override permissions when configuring
+switchdev, as the enabled state is the default configuration that is never
+modified.
+
+Fixes: 1a1c40df2e80 ("ice: set and release switchdev environment")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ------
+ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ------------------
+ drivers/net/ethernet/intel/ice/ice_lib.h | 4 ----
+ 3 files changed, 28 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
+index d649c197cf673..ed21d7f55ac11 100644
+--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
++++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
+@@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
+ if (vlan_ops->dis_rx_filtering(uplink_vsi))
+ goto err_vlan_filtering;
+
+- if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
+- goto err_override_uplink;
+-
+ if (ice_vsi_update_local_lb(uplink_vsi, true))
+ goto err_override_local_lb;
+
+@@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
+ err_up:
+ ice_vsi_update_local_lb(uplink_vsi, false);
+ err_override_local_lb:
+- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+-err_override_uplink:
+ vlan_ops->ena_rx_filtering(uplink_vsi);
+ err_vlan_filtering:
+ ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false,
+@@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
+ vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
+
+ ice_vsi_update_local_lb(uplink_vsi, false);
+- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+ vlan_ops->ena_rx_filtering(uplink_vsi);
+ ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false,
+ ICE_FLTR_TX);
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index d4e74f96a8ad5..121a5ad5c8e10 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -3928,24 +3928,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx)
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+ }
+
+-/**
+- * ice_vsi_ctx_set_allow_override - allow destination override on VSI
+- * @ctx: pointer to VSI ctx structure
+- */
+-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx)
+-{
+- ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+-}
+-
+-/**
+- * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI
+- * @ctx: pointer to VSI ctx structure
+- */
+-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
+-{
+- ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+-}
+-
+ /**
+ * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit
+ * @vsi: pointer to VSI structure
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
+index 1a6cfc8693ce4..2b27998fd1be3 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.h
++++ b/drivers/net/ethernet/intel/ice/ice_lib.h
+@@ -106,10 +106,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *))
+ void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx);
+
+ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
+-
+-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
+-
+-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
+ int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set);
+ int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
+ int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
+--
+2.39.5
+
--- /dev/null
+From efe9a0a25d1adce7367a870159d4ec2d5c1863ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jan 2025 09:15:39 +0100
+Subject: ice: fix memory leak in aRFS after reset
+
+From: Grzegorz Nitka <grzegorz.nitka@intel.com>
+
+[ Upstream commit 23d97f18901ef5e4e264e3b1777fe65c760186b5 ]
+
+Fix aRFS (accelerated Receive Flow Steering) structures memory leak by
+adding a checker to verify if aRFS memory is already allocated while
+configuring VSI. aRFS objects are allocated in two cases:
+- as part of VSI initialization (at probe), and
+- as part of reset handling
+
+However, VSI reconfiguration executed during reset involves memory
+allocation one more time, without prior releasing already allocated
+resources. This led to the memory leak with the following signature:
+
+[root@os-delivery ~]# cat /sys/kernel/debug/kmemleak
+unreferenced object 0xff3c1ca7252e6000 (size 8192):
+ comm "kworker/0:0", pid 8, jiffies 4296833052
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace (crc 0):
+ [<ffffffff991ec485>] __kmalloc_cache_noprof+0x275/0x340
+ [<ffffffffc0a6e06a>] ice_init_arfs+0x3a/0xe0 [ice]
+ [<ffffffffc09f1027>] ice_vsi_cfg_def+0x607/0x850 [ice]
+ [<ffffffffc09f244b>] ice_vsi_setup+0x5b/0x130 [ice]
+ [<ffffffffc09c2131>] ice_init+0x1c1/0x460 [ice]
+ [<ffffffffc09c64af>] ice_probe+0x2af/0x520 [ice]
+ [<ffffffff994fbcd3>] local_pci_probe+0x43/0xa0
+ [<ffffffff98f07103>] work_for_cpu_fn+0x13/0x20
+ [<ffffffff98f0b6d9>] process_one_work+0x179/0x390
+ [<ffffffff98f0c1e9>] worker_thread+0x239/0x340
+ [<ffffffff98f14abc>] kthread+0xcc/0x100
+ [<ffffffff98e45a6d>] ret_from_fork+0x2d/0x50
+ [<ffffffff98e083ba>] ret_from_fork_asm+0x1a/0x30
+ ...
+
+Fixes: 28bf26724fdb ("ice: Implement aRFS")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
+index 7cee365cc7d16..405ddd17de1bf 100644
+--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
++++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
+@@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi)
+ struct hlist_head *arfs_fltr_list;
+ unsigned int i;
+
+- if (!vsi || vsi->type != ICE_VSI_PF)
++ if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi))
+ return;
+
+ arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list),
+--
+2.39.5
+
--- /dev/null
+From ee10c15ba7836668923c4cacb2e673e2d311d7a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jan 2025 20:07:52 +0100
+Subject: ice: Fix switchdev slow-path in LAG
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit dce97cb0a3e34204c0b99345418a714eac85953f ]
+
+Ever since removing switchdev control VSI and using PF for port
+representor Tx/Rx, switchdev slow-path has been working improperly after
+failover in SR-IOV LAG. LAG assumes that the first uplink to be added to
+the aggregate will own VFs and have switchdev configured. After
+failing-over to the other uplink, representors are still configured to
+Tx through the uplink they are set up on, which fails because that
+uplink is now down.
+
+On failover, update all PRs on primary uplink to use the currently
+active uplink for Tx. Call netif_keep_dst(), as the secondary uplink
+might not be in switchdev mode. Also make sure to call
+ice_eswitch_set_target_vsi() if uplink is in LAG.
+
+On the Rx path, representors are already working properly, because
+default Tx from VFs is set to PF owning the eswitch. After failover the
+same PF is receiving traffic from VFs, even though link is down.
+
+Fixes: defd52455aee ("ice: do Tx through PF netdev in slow-path")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_lag.c | 27 +++++++++++++++++++++++
+ drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++-
+ 2 files changed, 30 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
+index 1ccb572ce285d..22371011c2492 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lag.c
++++ b/drivers/net/ethernet/intel/ice/ice_lag.c
+@@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag)
+ netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n");
+ }
+
++/**
++ * ice_lag_config_eswitch - configure eswitch to work with LAG
++ * @lag: lag info struct
++ * @netdev: active network interface device struct
++ *
++ * Updates all port representors in eswitch to use @netdev for Tx.
++ *
++ * Configures the netdev to keep dst metadata (also used in representor Tx).
++ * This is required for an uplink without switchdev mode configured.
++ */
++static void ice_lag_config_eswitch(struct ice_lag *lag,
++ struct net_device *netdev)
++{
++ struct ice_repr *repr;
++ unsigned long id;
++
++ xa_for_each(&lag->pf->eswitch.reprs, id, repr)
++ repr->dst->u.port_info.lower_dev = netdev;
++
++ netif_keep_dst(netdev);
++}
++
+ /**
+ * ice_lag_unlink - handle unlink event
+ * @lag: LAG info struct
+@@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag)
+ ice_lag_move_vf_nodes(lag, act_port, pri_port);
+ lag->primary = false;
+ lag->active_port = ICE_LAG_INVALID_PORT;
++
++ /* Config primary's eswitch back to normal operation. */
++ ice_lag_config_eswitch(lag, lag->netdev);
+ } else {
+ struct ice_lag *primary_lag;
+
+@@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
+ ice_lag_move_vf_nodes(lag, prim_port,
+ event_port);
+ lag->active_port = event_port;
++ ice_lag_config_eswitch(lag, event_netdev);
+ return;
+ }
+
+@@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
+ /* new active port */
+ ice_lag_move_vf_nodes(lag, lag->active_port, event_port);
+ lag->active_port = event_port;
++ ice_lag_config_eswitch(lag, event_netdev);
+ } else {
+ /* port not set as currently active (e.g. new active port
+ * has already claimed the nodes and filters
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index f12fb3a2b6ad9..f522dd42093a9 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
+ ICE_TXD_CTX_QW1_CMD_S);
+
+ ice_tstamp(tx_ring, skb, first, &offload);
+- if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF)
++ if ((ice_is_switchdev_running(vsi->back) ||
++ ice_lag_is_switchdev_running(vsi->back)) &&
++ vsi->type != ICE_VSI_SF)
+ ice_eswitch_set_target_vsi(skb, &offload);
+
+ if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
+--
+2.39.5
+
--- /dev/null
+From c3c6f2b1b60252505bb9a81572c28dff747ed38f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Mar 2025 10:45:53 +0300
+Subject: ipvs: prevent integer overflow in do_ip_vs_get_ctl()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 80b78c39eb86e6b55f56363b709eb817527da5aa ]
+
+The get->num_services variable is an unsigned int which is controlled by
+the user. The struct_size() function ensures that the size calculation
+does not overflow an unsigned long, however, we are saving the result to
+an int so the calculation can overflow.
+
+Both "len" and "get->num_services" come from the user. This check is
+just a sanity check to help the user and ensure they are using the API
+correctly. An integer overflow here is not a big deal. This has no
+security impact.
+
+Save the result from struct_size() type size_t to fix this integer
+overflow bug.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Julian Anastasov <ja@ssi.bg>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
+index dc6ddc4abbe21..3224f6e17e736 100644
+--- a/net/netfilter/ipvs/ip_vs_ctl.c
++++ b/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+ case IP_VS_SO_GET_SERVICES:
+ {
+ struct ip_vs_get_services *get;
+- int size;
++ size_t size;
+
+ get = (struct ip_vs_get_services *)arg;
+ size = struct_size(get, entrytable, get->num_services);
+ if (*len != size) {
+- pr_err("length: %u != %u\n", *len, size);
++ pr_err("length: %u != %zu\n", *len, size);
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+ case IP_VS_SO_GET_DESTS:
+ {
+ struct ip_vs_get_dests *get;
+- int size;
++ size_t size;
+
+ get = (struct ip_vs_get_dests *)arg;
+ size = struct_size(get, entrytable, get->num_dests);
+ if (*len != size) {
+- pr_err("length: %u != %u\n", *len, size);
++ pr_err("length: %u != %zu\n", *len, size);
+ ret = -EINVAL;
+ goto out;
+ }
+--
+2.39.5
+
--- /dev/null
+From 9548ec6899c30c7d830b7d097448106d8b081d23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 12:23:05 -0500
+Subject: net: dsa: mv88e6xxx: Verify after ATU Load ops
+
+From: Joseph Huang <Joseph.Huang@garmin.com>
+
+[ Upstream commit dc5340c3133a3ebe54853fd299116149e528cfaa ]
+
+ATU Load operations could fail silently if there's not enough space
+on the device to hold the new entry. When this happens, the symptom
+depends on the unknown flood settings. If unknown multicast flood is
+disabled, the multicast packets are dropped when the ATU table is
+full. If unknown multicast flood is enabled, the multicast packets
+will be flooded to all ports. Either way, IGMP snooping is broken
+when the ATU Load operation fails silently.
+
+Do a Read-After-Write verification after each fdb/mdb add operation
+to make sure that the operation was really successful, and return
+-ENOSPC otherwise.
+
+Fixes: defb05b9b9b4 ("net: dsa: mv88e6xxx: Add support for fdb_add, fdb_del, and fdb_getnext")
+Signed-off-by: Joseph Huang <Joseph.Huang@garmin.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20250306172306.3859214-1-Joseph.Huang@garmin.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c | 59 ++++++++++++++++++++++++++------
+ 1 file changed, 48 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 284270a4ade1c..5aeecfab96306 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -2261,13 +2261,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
+ return err;
+ }
+
+-static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+- const unsigned char *addr, u16 vid,
+- u8 state)
++static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip,
++ const unsigned char *addr, u16 vid,
++ u16 *fid, struct mv88e6xxx_atu_entry *entry)
+ {
+- struct mv88e6xxx_atu_entry entry;
+ struct mv88e6xxx_vtu_entry vlan;
+- u16 fid;
+ int err;
+
+ /* Ports have two private address databases: one for when the port is
+@@ -2278,7 +2276,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+ * VLAN ID into the port's database used for VLAN-unaware bridging.
+ */
+ if (vid == 0) {
+- fid = MV88E6XXX_FID_BRIDGED;
++ *fid = MV88E6XXX_FID_BRIDGED;
+ } else {
+ err = mv88e6xxx_vtu_get(chip, vid, &vlan);
+ if (err)
+@@ -2288,14 +2286,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+ if (!vlan.valid)
+ return -EOPNOTSUPP;
+
+- fid = vlan.fid;
++ *fid = vlan.fid;
+ }
+
+- entry.state = 0;
+- ether_addr_copy(entry.mac, addr);
+- eth_addr_dec(entry.mac);
++ entry->state = 0;
++ ether_addr_copy(entry->mac, addr);
++ eth_addr_dec(entry->mac);
++
++ return mv88e6xxx_g1_atu_getnext(chip, *fid, entry);
++}
++
++static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip,
++ const unsigned char *addr, u16 vid)
++{
++ struct mv88e6xxx_atu_entry entry;
++ u16 fid;
++ int err;
+
+- err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry);
++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry);
++ if (err)
++ return false;
++
++ return entry.state && ether_addr_equal(entry.mac, addr);
++}
++
++static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
++ const unsigned char *addr, u16 vid,
++ u8 state)
++{
++ struct mv88e6xxx_atu_entry entry;
++ u16 fid;
++ int err;
++
++ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry);
+ if (err)
+ return err;
+
+@@ -2893,6 +2916,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
+ MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
++ if (err)
++ goto out;
++
++ if (!mv88e6xxx_port_db_find(chip, addr, vid))
++ err = -ENOSPC;
++
++out:
+ mv88e6xxx_reg_unlock(chip);
+
+ return err;
+@@ -6593,6 +6623,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
+ mv88e6xxx_reg_lock(chip);
+ err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
+ MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC);
++ if (err)
++ goto out;
++
++ if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid))
++ err = -ENOSPC;
++
++out:
+ mv88e6xxx_reg_unlock(chip);
+
+ return err;
+--
+2.39.5
+
--- /dev/null
+From 86db2fa7f0697b116cc507bfd41ccd36e8951aa0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 10:33:20 +0800
+Subject: net: mctp i2c: Copy headers if cloned
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit df8ce77ba8b7c012a3edd1ca7368b46831341466 ]
+
+Use skb_cow_head() prior to modifying the TX SKB. This is necessary
+when the SKB has been cloned, to avoid modifying other shared clones.
+
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver")
+Link: https://patch.msgid.link/20250306-matt-mctp-i2c-cow-v1-1-293827212681@codeconstruct.com.au
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mctp/mctp-i2c.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
+index e70fb66879941..6622de48fc9e7 100644
+--- a/drivers/net/mctp/mctp-i2c.c
++++ b/drivers/net/mctp/mctp-i2c.c
+@@ -584,6 +584,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
+ struct mctp_i2c_hdr *hdr;
+ struct mctp_hdr *mhdr;
+ u8 lldst, llsrc;
++ int rc;
+
+ if (len > MCTP_I2C_MAXMTU)
+ return -EMSGSIZE;
+@@ -594,6 +595,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
+ lldst = *((u8 *)daddr);
+ llsrc = *((u8 *)saddr);
+
++ rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr));
++ if (rc)
++ return rc;
++
+ skb_push(skb, sizeof(struct mctp_i2c_hdr));
+ skb_reset_mac_header(skb);
+ hdr = (void *)skb_mac_header(skb);
+--
+2.39.5
+
--- /dev/null
+From 4ef226081c88727810782f7c742421ad9bdd234f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 18:24:18 +0800
+Subject: net: mctp i3c: Copy headers if cloned
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit 26db9c9ee19c36a97dbb1cfef007a3c189c4c874 ]
+
+Use skb_cow_head() prior to modifying the tx skb. This is necessary
+when the skb has been cloned, to avoid modifying other shared clones.
+
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Fixes: c8755b29b58e ("mctp i3c: MCTP I3C driver")
+Link: https://patch.msgid.link/20250306-matt-i3c-cow-head-v1-1-d5e6a5495227@codeconstruct.com.au
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mctp/mctp-i3c.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c
+index a2b15cddf46e6..47513ebbc6807 100644
+--- a/drivers/net/mctp/mctp-i3c.c
++++ b/drivers/net/mctp/mctp-i3c.c
+@@ -506,10 +506,15 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev,
+ const void *saddr, unsigned int len)
+ {
+ struct mctp_i3c_internal_hdr *ihdr;
++ int rc;
+
+ if (!daddr || !saddr)
+ return -EINVAL;
+
++ rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr));
++ if (rc)
++ return rc;
++
+ skb_push(skb, sizeof(struct mctp_i3c_internal_hdr));
+ skb_reset_mac_header(skb);
+ ihdr = (void *)skb_mac_header(skb);
+--
+2.39.5
+
--- /dev/null
+From fbe8adfa076f7b659e8bbe5c6244fa438ee49352 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 10:32:45 +0800
+Subject: net: mctp: unshare packets when reassembling
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit f5d83cf0eeb90fade4d5c4d17d24b8bee9ceeecc ]
+
+Ensure that the frag_list used for reassembly isn't shared with other
+packets. This avoids incorrect reassembly when packets are cloned, and
+prevents a memory leak due to circular references between fragments and
+their skb_shared_info.
+
+The upcoming MCTP-over-USB driver uses skb_clone which can trigger the
+problem - other MCTP drivers don't share SKBs.
+
+A kunit test is added to reproduce the issue.
+
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250306-matt-mctp-usb-v1-1-085502b3dd28@codeconstruct.com.au
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mctp/route.c | 10 +++-
+ net/mctp/test/route-test.c | 109 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 117 insertions(+), 2 deletions(-)
+
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index 3f2bd65ff5e3c..4c460160914f0 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+ & MCTP_HDR_SEQ_MASK;
+
+ if (!key->reasm_head) {
+- key->reasm_head = skb;
+- key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
++ /* Since we're manipulating the shared frag_list, ensure it isn't
++ * shared with any other SKBs.
++ */
++ key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
++ if (!key->reasm_head)
++ return -ENOMEM;
++
++ key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
+ key->last_seq = this_seq;
+ return 0;
+ }
+diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
+index 17165b86ce22d..06c1897b685a8 100644
+--- a/net/mctp/test/route-test.c
++++ b/net/mctp/test/route-test.c
+@@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
+ __mctp_route_test_fini(test, dev, rt, sock);
+ }
+
++/* Input route to socket, using a fragmented message created from clones.
++ */
++static void mctp_test_route_input_cloned_frag(struct kunit *test)
++{
++ /* 5 packet fragments, forming 2 complete messages */
++ const struct mctp_hdr hdrs[5] = {
++ RX_FRAG(FL_S, 0),
++ RX_FRAG(0, 1),
++ RX_FRAG(FL_E, 2),
++ RX_FRAG(FL_S, 0),
++ RX_FRAG(FL_E, 1),
++ };
++ struct mctp_test_route *rt;
++ struct mctp_test_dev *dev;
++ struct sk_buff *skb[5];
++ struct sk_buff *rx_skb;
++ struct socket *sock;
++ size_t data_len;
++ u8 compare[100];
++ u8 flat[100];
++ size_t total;
++ void *p;
++ int rc;
++
++ /* Arbitrary length */
++ data_len = 3;
++ total = data_len + sizeof(struct mctp_hdr);
++
++ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
++
++ /* Create a single skb initially with concatenated packets */
++ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total);
++ mctp_test_skb_set_dev(skb[0], dev);
++ memset(skb[0]->data, 0 * 0x11, skb[0]->len);
++ memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr));
++
++ /* Extract and populate packets */
++ for (int i = 1; i < 5; i++) {
++ skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC);
++ KUNIT_ASSERT_TRUE(test, skb[i]);
++ p = skb_pull(skb[i], total);
++ KUNIT_ASSERT_TRUE(test, p);
++ skb_reset_network_header(skb[i]);
++ memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr));
++ memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len);
++ }
++ for (int i = 0; i < 5; i++)
++ skb_trim(skb[i], total);
++
++ /* SOM packets have a type byte to match the socket */
++ skb[0]->data[4] = 0;
++ skb[3]->data[4] = 0;
++
++ skb_dump("pkt1 ", skb[0], false);
++ skb_dump("pkt2 ", skb[1], false);
++ skb_dump("pkt3 ", skb[2], false);
++ skb_dump("pkt4 ", skb[3], false);
++ skb_dump("pkt5 ", skb[4], false);
++
++ for (int i = 0; i < 5; i++) {
++ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
++ /* Take a reference so we can check refcounts at the end */
++ skb_get(skb[i]);
++ }
++
++ /* Feed the fragments into MCTP core */
++ for (int i = 0; i < 5; i++) {
++ rc = mctp_route_input(&rt->rt, skb[i]);
++ KUNIT_EXPECT_EQ(test, rc, 0);
++ }
++
++ /* Receive first reassembled message */
++ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
++ KUNIT_EXPECT_EQ(test, rc, 0);
++ KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len);
++ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
++ for (int i = 0; i < rx_skb->len; i++)
++ compare[i] = (i / data_len) * 0x11;
++ /* Set type byte */
++ compare[0] = 0;
++
++ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
++ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
++ kfree_skb(rx_skb);
++
++ /* Receive second reassembled message */
++ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
++ KUNIT_EXPECT_EQ(test, rc, 0);
++ KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len);
++ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
++ for (int i = 0; i < rx_skb->len; i++)
++ compare[i] = (i / data_len + 3) * 0x11;
++ /* Set type byte */
++ compare[0] = 0;
++
++ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
++ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
++ kfree_skb(rx_skb);
++
++ /* Check input skb refcounts */
++ for (int i = 0; i < 5; i++) {
++ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
++ kfree_skb(skb[i]);
++ }
++
++ __mctp_route_test_fini(test, dev, rt, sock);
++}
++
+ #if IS_ENABLED(CONFIG_MCTP_FLOWS)
+
+ static void mctp_test_flow_init(struct kunit *test,
+@@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = {
+ KUNIT_CASE(mctp_test_packet_flow),
+ KUNIT_CASE(mctp_test_fragment_flow),
+ KUNIT_CASE(mctp_test_route_output_key_create),
++ KUNIT_CASE(mctp_test_route_input_cloned_frag),
+ {}
+ };
+
+--
+2.39.5
+
--- /dev/null
+From 2812aaf94aaf113440dba717d6326b389237909d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 00:01:43 +0200
+Subject: net/mlx5: Bridge, fix the crash caused by LAG state check
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit 4b8eeed4fb105770ce6dc84a2c6ef953c7b71cbb ]
+
+When removing LAG device from bridge, NETDEV_CHANGEUPPER event is
+triggered. Driver finds the lower devices (PFs) to flush all the
+offloaded entries. And mlx5_lag_is_shared_fdb is checked, it returns
+false if one of PF is unloaded. In such case,
+mlx5_esw_bridge_lag_rep_get() and its caller return NULL, instead of
+the alive PF, and the flush is skipped.
+
+Besides, the bridge fdb entry's lastuse is updated in mlx5 bridge
+event handler. But this SWITCHDEV_FDB_ADD_TO_BRIDGE event can be
+ignored in this case because the upper interface for bond is deleted,
+and the entry will never be aged because lastuse is never updated.
+
+To make things worse, as the entry is alive, mlx5 bridge workqueue
+keeps sending that event, which is then handled by kernel bridge
+notifier. It causes the following crash when accessing the passed bond
+netdev which is already destroyed.
+
+To fix this issue, remove such checks. LAG state is already checked in
+commit 15f8f168952f ("net/mlx5: Bridge, verify LAG state when adding
+bond to bridge"), driver still need to skip offload if LAG becomes
+invalid state after initialization.
+
+ Oops: stack segment: 0000 [#1] SMP
+ CPU: 3 UID: 0 PID: 23695 Comm: kworker/u40:3 Tainted: G OE 6.11.0_mlnx #1
+ Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ Workqueue: mlx5_bridge_wq mlx5_esw_bridge_update_work [mlx5_core]
+ RIP: 0010:br_switchdev_event+0x2c/0x110 [bridge]
+ Code: 44 00 00 48 8b 02 48 f7 00 00 02 00 00 74 69 41 54 55 53 48 83 ec 08 48 8b a8 08 01 00 00 48 85 ed 74 4a 48 83 fe 02 48 89 d3 <4c> 8b 65 00 74 23 76 49 48 83 fe 05 74 7e 48 83 fe 06 75 2f 0f b7
+ RSP: 0018:ffffc900092cfda0 EFLAGS: 00010297
+ RAX: ffff888123bfe000 RBX: ffffc900092cfe08 RCX: 00000000ffffffff
+ RDX: ffffc900092cfe08 RSI: 0000000000000001 RDI: ffffffffa0c585f0
+ RBP: 6669746f6e690a30 R08: 0000000000000000 R09: ffff888123ae92c8
+ R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888123ae9c60
+ R13: 0000000000000001 R14: ffffc900092cfe08 R15: 0000000000000000
+ FS: 0000000000000000(0000) GS:ffff88852c980000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f15914c8734 CR3: 0000000002830005 CR4: 0000000000770ef0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ? __die_body+0x1a/0x60
+ ? die+0x38/0x60
+ ? do_trap+0x10b/0x120
+ ? do_error_trap+0x64/0xa0
+ ? exc_stack_segment+0x33/0x50
+ ? asm_exc_stack_segment+0x22/0x30
+ ? br_switchdev_event+0x2c/0x110 [bridge]
+ ? sched_balance_newidle.isra.149+0x248/0x390
+ notifier_call_chain+0x4b/0xa0
+ atomic_notifier_call_chain+0x16/0x20
+ mlx5_esw_bridge_update+0xec/0x170 [mlx5_core]
+ mlx5_esw_bridge_update_work+0x19/0x40 [mlx5_core]
+ process_scheduled_works+0x81/0x390
+ worker_thread+0x106/0x250
+ ? bh_worker+0x110/0x110
+ kthread+0xb7/0xe0
+ ? kthread_park+0x80/0x80
+ ret_from_fork+0x2d/0x50
+ ? kthread_park+0x80/0x80
+ ret_from_fork_asm+0x11/0x20
+ </TASK>
+
+Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/1741644104-97767-6-git-send-email-tariqt@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+index 5d128c5b4529a..0f5d7ea8956f7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+@@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+- struct mlx5_core_dev *mdev;
+- struct mlx5e_priv *priv;
+-
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+- priv = netdev_priv(lower);
+- mdev = priv->mdev;
+- if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
++ if (mlx5_esw_bridge_dev_same_esw(lower, esw))
+ return lower;
+ }
+
+@@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *
+ priv = netdev_priv(rep);
+ mdev = priv->mdev;
+ if (netif_is_lag_master(dev))
+- return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
++ return mlx5_lag_is_master(mdev);
+ return true;
+ }
+
+@@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+ if (!rep)
+ return NOTIFY_DONE;
+
++ if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev))
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+ fdb_info = container_of(info,
+--
+2.39.5
+
--- /dev/null
+From 72da437ef7d816005033a3343cd88f0d991d19a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 23:25:29 +0200
+Subject: net/mlx5: Fill out devlink dev info only for PFs
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit d749d901b2168389f060b654fdaa08acf6b367d2 ]
+
+Firmware version query is supported on the PFs. Due to this
+following kernel warning log is observed:
+
+[ 188.590344] mlx5_core 0000:08:00.2: mlx5_fw_version_query:816:(pid 1453): fw query isn't supported by the FW
+
+Fix it by restricting the query and devlink info to the PF.
+
+Fixes: 8338d9378895 ("net/mlx5: Added devlink info callback")
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Link: https://patch.msgid.link/20250306212529.429329-1-tariqt@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+index 98d4306929f3e..a2cf3e79693dd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+@@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ u32 running_fw, stored_fw;
+ int err;
+
++ if (!mlx5_core_is_pf(dev))
++ return 0;
++
+ err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+ if (err)
+ return err;
+--
+2.39.5
+
--- /dev/null
+From 97a9010e5b61e5e43ee4942f2cf3eed630279a36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 00:01:41 +0200
+Subject: net/mlx5: Fix incorrect IRQ pool usage when releasing IRQs
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 32d2724db5b2361ab293427ccd5c24f4f2bcca14 ]
+
+mlx5_irq_pool_get() is a getter for completion IRQ pool only.
+However, after the cited commit, mlx5_irq_pool_get() is called during
+ctrl IRQ release flow to retrieve the pool, resulting in the use of an
+incorrect IRQ pool.
+
+Hence, use the newly introduced mlx5_irq_get_pool() getter to retrieve
+the correct IRQ pool based on the IRQ itself. While at it, rename
+mlx5_irq_pool_get() to mlx5_irq_table_get_comp_irq_pool() which
+accurately reflects its purpose and improves code readability.
+
+Fixes: 0477d5168bbb ("net/mlx5: Expose SFs IRQs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/1741644104-97767-4-git-send-email-tariqt@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
+ .../net/ethernet/mellanox/mlx5/core/irq_affinity.c | 2 +-
+ drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 4 +++-
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 13 ++++++++++---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 2 +-
+ 5 files changed, 16 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index 68cb86b37e561..4241cf07a0306 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -887,8 +887,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
+
+ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
+ {
++ struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev);
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ struct irq_affinity_desc af_desc = {};
+ struct mlx5_irq *irq;
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+index 1477db7f5307e..2691d88cdee1f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+@@ -175,7 +175,7 @@ mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
+
+ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq)
+ {
+- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
++ struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq);
+ int cpu;
+
+ cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+index 0881e961d8b17..586688da9940e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+@@ -10,12 +10,15 @@
+
+ struct mlx5_irq;
+ struct cpu_rmap;
++struct mlx5_irq_pool;
+
+ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+ int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+ void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
++struct mlx5_irq_pool *
++mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev);
+ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
+ int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
+ struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
+@@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
+ int mlx5_irq_get_index(struct mlx5_irq *irq);
+ int mlx5_irq_get_irq(const struct mlx5_irq *irq);
+
+-struct mlx5_irq_pool;
+ #ifdef CONFIG_MLX5_SF
+ struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
+ struct cpumask *used_cpus, u16 vecidx);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index d9362eabc6a1c..2c5f850c31f68 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
+ return irq->map.index;
+ }
+
++struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq)
++{
++ return irq->pool;
++}
++
+ /* irq_pool API */
+
+ /* requesting an irq from a given pool according to given index */
+@@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab
+ return irq_table->sf_ctrl_pool;
+ }
+
+-static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
++static struct mlx5_irq_pool *
++sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table)
+ {
+ return irq_table->sf_comp_pool;
+ }
+
+-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
++struct mlx5_irq_pool *
++mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev)
+ {
+ struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = NULL;
+
+ if (mlx5_core_is_sf(dev))
+- pool = sf_irq_pool_get(irq_table);
++ pool = sf_comp_irq_pool_get(irq_table);
+
+ /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
+ * the PF IRQs pool in case the SF pool doesn't exist.
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+index c4d377f8df308..cc064425fe160 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+@@ -28,7 +28,6 @@ struct mlx5_irq_pool {
+ struct mlx5_core_dev *dev;
+ };
+
+-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
+ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+ {
+ return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+@@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
+ int mlx5_irq_get_locked(struct mlx5_irq *irq);
+ int mlx5_irq_read_locked(struct mlx5_irq *irq);
+ int mlx5_irq_put(struct mlx5_irq *irq);
++struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq);
+
+ #endif /* __PCI_IRQ_H__ */
+--
+2.39.5
+
--- /dev/null
+From 127e4df80c9a5461bbd03c86c1a5cd374dfb5ede Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Mar 2025 10:18:20 +0800
+Subject: net/mlx5: handle errors in mlx5_chains_create_table()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Wentao Liang <vulab@iscas.ac.cn>
+
+[ Upstream commit eab0396353be1c778eba1c0b5180176f04dd21ce ]
+
+In mlx5_chains_create_table(), the return value of mlx5_get_fdb_sub_ns()
+and mlx5_get_flow_namespace() must be checked to prevent NULL pointer
+dereferences. If either function fails, the function should log error
+message with mlx5_core_warn() and return error pointer.
+
+Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities")
+Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/20250307021820.2646-1-vulab@iscas.ac.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+index a80ecb672f33d..711d14dea2485 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+ ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
+ }
+
++ if (!ns) {
++ mlx5_core_warn(chains->dev, "Failed to get flow namespace\n");
++ return ERR_PTR(-EOPNOTSUPP);
++ }
++
+ ft_attr.autogroup.num_reserved_entries = 2;
+ ft_attr.autogroup.max_num_groups = chains->group_num;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+--
+2.39.5
+
--- /dev/null
+From 45bf5f760c91ae6da79299e5b4992f6e7ae232e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 00:01:40 +0200
+Subject: net/mlx5: HWS, Rightsize bwc matcher priority
+
+From: Vlad Dogaru <vdogaru@nvidia.com>
+
+[ Upstream commit 521992337f67f71ce4436b98bc32563ddb1a5ce3 ]
+
+The bwc layer was clamping the matcher priority from 32 bits to 16 bits.
+This didn't show up until a matcher was resized, since the initial
+native matcher was created using the correct 32 bit value.
+
+The fix also reorders fields to avoid some padding.
+
+Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling")
+Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
+Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Link: https://patch.msgid.link/1741644104-97767-3-git-send-email-tariqt@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h
+index 4fe8c32d8fbe8..681fb73f00bbf 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws_bwc.h
+@@ -16,8 +16,8 @@ struct mlx5hws_bwc_matcher {
+ struct mlx5hws_matcher *matcher;
+ struct mlx5hws_match_template *mt;
+ struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM];
++ u32 priority;
+ u8 num_of_at;
+- u16 priority;
+ u8 size_log;
+ u32 num_of_rules; /* atomically accessed */
+ struct list_head *rules;
+--
+2.39.5
+
--- /dev/null
+From 729023fe7a9ec32bbabcc977e7c5fa7f5d9a2339 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 00:01:42 +0200
+Subject: net/mlx5: Lag, Check shared fdb before creating MultiPort E-Switch
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 32966984bee1defd9f5a8f9be274d7c32f911ba1 ]
+
+Currently, MultiPort E-Switch is requesting to create a LAG with shared
+FDB without checking the LAG is supporting shared FDB.
+Add the check.
+
+Fixes: a32327a3a02c ("net/mlx5: Lag, Control MultiPort E-Switch single FDB mode")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/1741644104-97767-5-git-send-email-tariqt@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 +
+ drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 3 ++-
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+index 7f68468c2e759..4b3da7ebd6310 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+@@ -859,7 +859,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
+ mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
+ }
+
+-static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
++bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
+ {
+ struct mlx5_core_dev *dev;
+ int i;
+@@ -937,7 +937,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
+ }
+
+ if (do_bond && !__mlx5_lag_is_active(ldev)) {
+- bool shared_fdb = mlx5_shared_fdb_supported(ldev);
++ bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
+
+ roce_lag = mlx5_lag_is_roce_lag(ldev);
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+index 50fcb1eee5748..48a5f3e7b91a8 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+@@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
+ return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
+ }
+
++bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
+ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
+ void mlx5_modify_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+index 571ea26edd0ca..2381a0eec1900 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+@@ -81,7 +81,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
+ if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS ||
+ !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) ||
+ !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) ||
+- !mlx5_lag_check_prereq(ldev))
++ !mlx5_lag_check_prereq(ldev) ||
++ !mlx5_lag_shared_fdb_supported(ldev))
+ return -EOPNOTSUPP;
+
+ err = mlx5_mpesw_metadata_set(ldev);
+--
+2.39.5
+
--- /dev/null
+From 4d0a2b90fd3fd1f48e50c932f8cec2e8776de974 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Mar 2025 00:01:44 +0200
+Subject: net/mlx5e: Prevent bridge link show failure for non-eswitch-allowed
+ devices
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit e92df790d07a8eea873efcb84776e7b71f81c7d5 ]
+
+mlx5_eswitch_get_vepa returns -EPERM if the device lacks
+eswitch_manager capability, blocking mlx5e_bridge_getlink from
+retrieving VEPA mode. Since mlx5e_bridge_getlink implements
+ndo_bridge_getlink, returning -EPERM causes bridge link show to fail
+instead of skipping devices without this capability.
+
+To avoid this, return -EOPNOTSUPP from mlx5e_bridge_getlink when
+mlx5_eswitch_get_vepa fails, ensuring the command continues processing
+other devices while ignoring those without the necessary capability.
+
+Fixes: 4b89251de024 ("net/mlx5: Support ndo bridge_setlink and getlink")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/1741644104-97767-7-git-send-email-tariqt@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 62b8a7c1c6b54..1c087fa1ca269 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -5099,11 +5099,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mode, setting;
+- int err;
+
+- err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
+- if (err)
+- return err;
++ if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting))
++ return -EOPNOTSUPP;
+ mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
+ mode,
+--
+2.39.5
+
--- /dev/null
+From 2e366739bf78d425933ab81aeaf4850ddf74a312 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Mar 2025 01:45:59 +0100
+Subject: net: openvswitch: remove misbehaving actions length check
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+[ Upstream commit a1e64addf3ff9257b45b78bc7d743781c3f41340 ]
+
+The actions length check is unreliable and produces different results
+depending on the initial length of the provided netlink attribute and
+the composition of the actual actions inside of it. For example, a
+user can add 4088 empty clone() actions without triggering -EMSGSIZE,
+on attempt to add 4089 such actions the operation will fail with the
+-EMSGSIZE verdict. However, if another 16 KB of other actions will
+be *appended* to the previous 4089 clone() actions, the check passes
+and the flow is successfully installed into the openvswitch datapath.
+
+The reason for a such a weird behavior is the way memory is allocated.
+When ovs_flow_cmd_new() is invoked, it calls ovs_nla_copy_actions(),
+that in turn calls nla_alloc_flow_actions() with either the actual
+length of the user-provided actions or the MAX_ACTIONS_BUFSIZE. The
+function adds the size of the sw_flow_actions structure and then the
+actually allocated memory is rounded up to the closest power of two.
+
+So, if the user-provided actions are larger than MAX_ACTIONS_BUFSIZE,
+then MAX_ACTIONS_BUFSIZE + sizeof(*sfa) rounded up is 32K + 24 -> 64K.
+Later, while copying individual actions, we look at ksize(), which is
+64K, so this way the MAX_ACTIONS_BUFSIZE check is not actually
+triggered and the user can easily allocate almost 64 KB of actions.
+
+However, when the initial size is less than MAX_ACTIONS_BUFSIZE, but
+the actions contain ones that require size increase while copying
+(such as clone() or sample()), then the limit check will be performed
+during the reserve_sfa_size() and the user will not be allowed to
+create actions that yield more than 32 KB internally.
+
+This is one part of the problem. The other part is that it's not
+actually possible for the userspace application to know beforehand
+if the particular set of actions will be rejected or not.
+
+Certain actions require more space in the internal representation,
+e.g. an empty clone() takes 4 bytes in the action list passed in by
+the user, but it takes 12 bytes in the internal representation due
+to an extra nested attribute, and some actions require less space in
+the internal representations, e.g. set(tunnel(..)) normally takes
+64+ bytes in the action list provided by the user, but only needs to
+store a single pointer in the internal implementation, since all the
+data is stored in the tunnel_info structure instead.
+
+And the action size limit is applied to the internal representation,
+not to the action list passed by the user. So, it's not possible for
+the userpsace application to predict if the certain combination of
+actions will be rejected or not, because it is not possible for it to
+calculate how much space these actions will take in the internal
+representation without knowing kernel internals.
+
+All that is causing random failures in ovs-vswitchd in userspace and
+inability to handle certain traffic patterns as a result. For example,
+it is reported that adding a bit more than a 1100 VMs in an OpenStack
+setup breaks the network due to OVS not being able to handle ARP
+traffic anymore in some cases (it tries to install a proper datapath
+flow, but the kernel rejects it with -EMSGSIZE, even though the action
+list isn't actually that large.)
+
+Kernel behavior must be consistent and predictable in order for the
+userspace application to use it in a reasonable way. ovs-vswitchd has
+a mechanism to re-direct parts of the traffic and partially handle it
+in userspace if the required action list is oversized, but that doesn't
+work properly if we can't actually tell if the action list is oversized
+or not.
+
+Solution for this is to check the size of the user-provided actions
+instead of the internal representation. This commit just removes the
+check from the internal part because there is already an implicit size
+check imposed by the netlink protocol. The attribute can't be larger
+than 64 KB. Realistically, we could reduce the limit to 32 KB, but
+we'll be risking to break some existing setups that rely on the fact
+that it's possible to create nearly 64 KB action lists today.
+
+Vast majority of flows in real setups are below 100-ish bytes. So
+removal of the limit will not change real memory consumption on the
+system. The absolutely worst case scenario is if someone adds a flow
+with 64 KB of empty clone() actions. That will yield a 192 KB in the
+internal representation consuming 256 KB block of memory. However,
+that list of actions is not meaningful and also a no-op. Real world
+very large action lists (that can occur for a rare cases of BUM
+traffic handling) are unlikely to contain a large number of clones and
+will likely have a lot of tunnel attributes making the internal
+representation comparable in size to the original action list.
+So, it should be fine to just remove the limit.
+
+Commit in the 'Fixes' tag is the first one that introduced the
+difference between internal representation and the user-provided action
+lists, but there were many more afterwards that lead to the situation
+we have today.
+
+Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.")
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Reviewed-by: Aaron Conole <aconole@redhat.com>
+Link: https://patch.msgid.link/20250308004609.2881861-1-i.maximets@ovn.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/flow_netlink.c | 15 +--------------
+ 1 file changed, 1 insertion(+), 14 deletions(-)
+
+diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
+index 729ef582a3a8b..0df89240b7336 100644
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
+ OVS_FLOW_ATTR_MASK, true, skb);
+ }
+
+-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
+-
+ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
+ {
+ struct sw_flow_actions *sfa;
+
+- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
+-
+ sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
+ if (!sfa)
+ return ERR_PTR(-ENOMEM);
+@@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+
+ new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
+
+- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
+- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
+- OVS_NLERR(log, "Flow action size exceeds max %u",
+- MAX_ACTIONS_BUFSIZE);
+- return ERR_PTR(-EMSGSIZE);
+- }
+- new_acts_size = MAX_ACTIONS_BUFSIZE;
+- }
+-
+ acts = nla_alloc_flow_actions(new_acts_size);
+ if (IS_ERR(acts))
+ return ERR_CAST(acts);
+@@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
+ int err;
+ u32 mpls_label_count = 0;
+
+- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
++ *sfa = nla_alloc_flow_actions(nla_len(attr));
+ if (IS_ERR(*sfa))
+ return PTR_ERR(*sfa);
+
+--
+2.39.5
+
--- /dev/null
+From 6ac6d3c6f2e373dd2f5fd6469d21a758a3f1c1df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Mar 2025 14:15:09 +0200
+Subject: net: switchdev: Convert blocking notification chain to a raw one
+
+From: Amit Cohen <amcohen@nvidia.com>
+
+[ Upstream commit 62531a1effa87bdab12d5104015af72e60d926ff ]
+
+A blocking notification chain uses a read-write semaphore to protect the
+integrity of the chain. The semaphore is acquired for writing when
+adding / removing notifiers to / from the chain and acquired for reading
+when traversing the chain and informing notifiers about an event.
+
+In case of the blocking switchdev notification chain, recursive
+notifications are possible which leads to the semaphore being acquired
+twice for reading and to lockdep warnings being generated [1].
+
+Specifically, this can happen when the bridge driver processes a
+SWITCHDEV_BRPORT_UNOFFLOADED event which causes it to emit notifications
+about deferred events when calling switchdev_deferred_process().
+
+Fix this by converting the notification chain to a raw notification
+chain in a similar fashion to the netdev notification chain. Protect
+the chain using the RTNL mutex by acquiring it when modifying the chain.
+Events are always informed under the RTNL mutex, but add an assertion in
+call_switchdev_blocking_notifiers() to make sure this is not violated in
+the future.
+
+Maintain the "blocking" prefix as events are always emitted from process
+context and listeners are allowed to block.
+
+[1]:
+WARNING: possible recursive locking detected
+6.14.0-rc4-custom-g079270089484 #1 Not tainted
+--------------------------------------------
+ip/52731 is trying to acquire lock:
+ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0
+
+but task is already holding lock:
+ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0
+
+other info that might help us debug this:
+Possible unsafe locking scenario:
+CPU0
+----
+lock((switchdev_blocking_notif_chain).rwsem);
+lock((switchdev_blocking_notif_chain).rwsem);
+
+*** DEADLOCK ***
+May be due to missing lock nesting notation
+3 locks held by ip/52731:
+ #0: ffffffff84f795b0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x727/0x1dc0
+ #1: ffffffff8731f628 (&net->rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x790/0x1dc0
+ #2: ffffffff850918d8 ((switchdev_blocking_notif_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x58/0xa0
+
+stack backtrace:
+...
+? __pfx_down_read+0x10/0x10
+? __pfx_mark_lock+0x10/0x10
+? __pfx_switchdev_port_attr_set_deferred+0x10/0x10
+blocking_notifier_call_chain+0x58/0xa0
+switchdev_port_attr_notify.constprop.0+0xb3/0x1b0
+? __pfx_switchdev_port_attr_notify.constprop.0+0x10/0x10
+? mark_held_locks+0x94/0xe0
+? switchdev_deferred_process+0x11a/0x340
+switchdev_port_attr_set_deferred+0x27/0xd0
+switchdev_deferred_process+0x164/0x340
+br_switchdev_port_unoffload+0xc8/0x100 [bridge]
+br_switchdev_blocking_event+0x29f/0x580 [bridge]
+notifier_call_chain+0xa2/0x440
+blocking_notifier_call_chain+0x6e/0xa0
+switchdev_bridge_port_unoffload+0xde/0x1a0
+...
+
+Fixes: f7a70d650b0b6 ("net: bridge: switchdev: Ensure deferred event delivery on unoffload")
+Signed-off-by: Amit Cohen <amcohen@nvidia.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Tested-by: Vladimir Oltean <olteanv@gmail.com>
+Link: https://patch.msgid.link/20250305121509.631207-1-amcohen@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/switchdev/switchdev.c | 25 ++++++++++++++++++-------
+ 1 file changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
+index 6488ead9e4645..4d5fbacef496f 100644
+--- a/net/switchdev/switchdev.c
++++ b/net/switchdev/switchdev.c
+@@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
+ static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
+-static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
++static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
+
+ /**
+ * register_switchdev_notifier - Register notifier
+@@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
+
+ int register_switchdev_blocking_notifier(struct notifier_block *nb)
+ {
+- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
++ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
++ int err;
++
++ rtnl_lock();
++ err = raw_notifier_chain_register(chain, nb);
++ rtnl_unlock();
+
+- return blocking_notifier_chain_register(chain, nb);
++ return err;
+ }
+ EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
+
+ int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
+ {
+- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
++ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
++ int err;
+
+- return blocking_notifier_chain_unregister(chain, nb);
++ rtnl_lock();
++ err = raw_notifier_chain_unregister(chain, nb);
++ rtnl_unlock();
++
++ return err;
+ }
+ EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
+
+@@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
+ struct switchdev_notifier_info *info,
+ struct netlink_ext_ack *extack)
+ {
++ ASSERT_RTNL();
+ info->dev = dev;
+ info->extack = extack;
+- return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
+- val, info);
++ return raw_notifier_call_chain(&switchdev_blocking_notif_chain,
++ val, info);
+ }
+ EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+
+--
+2.39.5
+
--- /dev/null
+From 4dff6db74447482027ffafbdfad201c5dfa013dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 15:23:54 -0800
+Subject: net_sched: Prevent creation of classes with TC_H_ROOT
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 0c3057a5a04d07120b3d0ec9c79568fceb9c921e ]
+
+The function qdisc_tree_reduce_backlog() uses TC_H_ROOT as a termination
+condition when traversing up the qdisc tree to update parent backlog
+counters. However, if a class is created with classid TC_H_ROOT, the
+traversal terminates prematurely at this class instead of reaching the
+actual root qdisc, causing parent statistics to be incorrectly maintained.
+In case of DRR, this could lead to a crash as reported by Mingi Cho.
+
+Prevent the creation of any Qdisc class with classid TC_H_ROOT
+(0xFFFFFFFF) across all qdisc types, as suggested by Jamal.
+
+Reported-by: Mingi Cho <mincho@theori.io>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Fixes: 066a3b5b2346 ("[NET_SCHED] sch_api: fix qdisc_tree_decrease_qlen() loop")
+Link: https://patch.msgid.link/20250306232355.93864-2-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_api.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index d26ac6bd9b108..518f52f65a49d 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -2254,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ return -EOPNOTSUPP;
+ }
+
++ /* Prevent creation of traffic classes with classid TC_H_ROOT */
++ if (clid == TC_H_ROOT) {
++ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
++ return -EINVAL;
++ }
++
+ new_cl = cl;
+ err = -EOPNOTSUPP;
+ if (cops->change)
+--
+2.39.5
+
--- /dev/null
+From 9a5c2e1e97d7d0590833b91de215cf249efddde3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Mar 2025 17:07:38 +0900
+Subject: netfilter: nf_conncount: Fully initialize struct nf_conncount_tuple
+ in insert_tree()
+
+From: Kohei Enju <enjuk@amazon.com>
+
+[ Upstream commit d653bfeb07ebb3499c403404c21ac58a16531607 ]
+
+Since commit b36e4523d4d5 ("netfilter: nf_conncount: fix garbage
+collection confirm race"), `cpu` and `jiffies32` were introduced to
+the struct nf_conncount_tuple.
+
+The commit made nf_conncount_add() initialize `conn->cpu` and
+`conn->jiffies32` when allocating the struct.
+In contrast, count_tree() was not changed to initialize them.
+
+By commit 34848d5c896e ("netfilter: nf_conncount: Split insert and
+traversal"), count_tree() was split and the relevant allocation
+code now resides in insert_tree().
+Initialize `conn->cpu` and `conn->jiffies32` in insert_tree().
+
+BUG: KMSAN: uninit-value in find_or_evict net/netfilter/nf_conncount.c:117 [inline]
+BUG: KMSAN: uninit-value in __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143
+ find_or_evict net/netfilter/nf_conncount.c:117 [inline]
+ __nf_conncount_add+0xd9c/0x2850 net/netfilter/nf_conncount.c:143
+ count_tree net/netfilter/nf_conncount.c:438 [inline]
+ nf_conncount_count+0x82f/0x1e80 net/netfilter/nf_conncount.c:521
+ connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72
+ __nft_match_eval net/netfilter/nft_compat.c:403 [inline]
+ nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433
+ expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline]
+ nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288
+ nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23
+ nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline]
+ nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626
+ nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663
+ NF_HOOK_LIST include/linux/netfilter.h:350 [inline]
+ ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633
+ ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669
+ __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline]
+ __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983
+ __netif_receive_skb_list net/core/dev.c:6035 [inline]
+ netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126
+ netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178
+ xdp_recv_frames net/bpf/test_run.c:280 [inline]
+ xdp_test_run_batch net/bpf/test_run.c:361 [inline]
+ bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390
+ bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316
+ bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407
+ __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813
+ __do_sys_bpf kernel/bpf/syscall.c:5902 [inline]
+ __se_sys_bpf kernel/bpf/syscall.c:5900 [inline]
+ __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900
+ ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358
+ do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline]
+ __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387
+ do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412
+ do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450
+ entry_SYSENTER_compat_after_hwframe+0x84/0x8e
+
+Uninit was created at:
+ slab_post_alloc_hook mm/slub.c:4121 [inline]
+ slab_alloc_node mm/slub.c:4164 [inline]
+ kmem_cache_alloc_noprof+0x915/0xe10 mm/slub.c:4171
+ insert_tree net/netfilter/nf_conncount.c:372 [inline]
+ count_tree net/netfilter/nf_conncount.c:450 [inline]
+ nf_conncount_count+0x1415/0x1e80 net/netfilter/nf_conncount.c:521
+ connlimit_mt+0x7f6/0xbd0 net/netfilter/xt_connlimit.c:72
+ __nft_match_eval net/netfilter/nft_compat.c:403 [inline]
+ nft_match_eval+0x1a5/0x300 net/netfilter/nft_compat.c:433
+ expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline]
+ nft_do_chain+0x426/0x2290 net/netfilter/nf_tables_core.c:288
+ nft_do_chain_ipv4+0x1a5/0x230 net/netfilter/nft_chain_filter.c:23
+ nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline]
+ nf_hook_slow+0xf4/0x400 net/netfilter/core.c:626
+ nf_hook_slow_list+0x24d/0x860 net/netfilter/core.c:663
+ NF_HOOK_LIST include/linux/netfilter.h:350 [inline]
+ ip_sublist_rcv+0x17b7/0x17f0 net/ipv4/ip_input.c:633
+ ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:669
+ __netif_receive_skb_list_ptype net/core/dev.c:5936 [inline]
+ __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5983
+ __netif_receive_skb_list net/core/dev.c:6035 [inline]
+ netif_receive_skb_list_internal+0x1085/0x1700 net/core/dev.c:6126
+ netif_receive_skb_list+0x5a/0x460 net/core/dev.c:6178
+ xdp_recv_frames net/bpf/test_run.c:280 [inline]
+ xdp_test_run_batch net/bpf/test_run.c:361 [inline]
+ bpf_test_run_xdp_live+0x2e86/0x3480 net/bpf/test_run.c:390
+ bpf_prog_test_run_xdp+0xf1d/0x1ae0 net/bpf/test_run.c:1316
+ bpf_prog_test_run+0x5e5/0xa30 kernel/bpf/syscall.c:4407
+ __sys_bpf+0x6aa/0xd90 kernel/bpf/syscall.c:5813
+ __do_sys_bpf kernel/bpf/syscall.c:5902 [inline]
+ __se_sys_bpf kernel/bpf/syscall.c:5900 [inline]
+ __ia32_sys_bpf+0xa0/0xe0 kernel/bpf/syscall.c:5900
+ ia32_sys_call+0x394d/0x4180 arch/x86/include/generated/asm/syscalls_32.h:358
+ do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline]
+ __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/common.c:387
+ do_fast_syscall_32+0x38/0x80 arch/x86/entry/common.c:412
+ do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:450
+ entry_SYSENTER_compat_after_hwframe+0x84/0x8e
+
+Reported-by: syzbot+83fed965338b573115f7@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=83fed965338b573115f7
+Fixes: b36e4523d4d5 ("netfilter: nf_conncount: fix garbage collection confirm race")
+Signed-off-by: Kohei Enju <enjuk@amazon.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conncount.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
+index ebe38ed2e6f4f..913ede2f57f9a 100644
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -377,6 +377,8 @@ insert_tree(struct net *net,
+
+ conn->tuple = *tuple;
+ conn->zone = *zone;
++ conn->cpu = raw_smp_processor_id();
++ conn->jiffies32 = (u32)jiffies;
+ memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
+
+ nf_conncount_list_init(&rbconn->list);
+--
+2.39.5
+
--- /dev/null
+From 79a506f7df08cd8fa09b7c6327ef87e1d3f62a57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Feb 2025 13:32:34 +0000
+Subject: netfilter: nf_conncount: garbage collection is not skipped when
+ jiffies wrap around
+
+From: Nicklas Bo Jensen <njensen@akamai.com>
+
+[ Upstream commit df08c94baafb001de6cf44bb7098bb557f36c335 ]
+
+nf_conncount is supposed to skip garbage collection if it has already
+run garbage collection in the same jiffy. Unfortunately, this is broken
+when jiffies wrap around which this patch fixes.
+
+The problem is that last_gc in the nf_conncount_list struct is an u32,
+but jiffies is an unsigned long which is 8 bytes on my systems. When
+those two are compared it only works until last_gc wraps around.
+
+See bug report: https://bugzilla.netfilter.org/show_bug.cgi?id=1778
+for more details.
+
+Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC")
+Signed-off-by: Nicklas Bo Jensen <njensen@akamai.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conncount.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
+index 4890af4dc263f..ebe38ed2e6f4f 100644
+--- a/net/netfilter/nf_conncount.c
++++ b/net/netfilter/nf_conncount.c
+@@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net,
+ struct nf_conn *found_ct;
+ unsigned int collect = 0;
+
+- if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
++ if ((u32)jiffies == list->last_gc)
+ goto add_new_node;
+
+ /* check the saved connections */
+@@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net,
+ bool ret = false;
+
+ /* don't bother if we just did GC */
+- if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
++ if ((u32)jiffies == READ_ONCE(list->last_gc))
+ return false;
+
+ /* don't bother if other cpu is already doing GC */
+--
+2.39.5
+
--- /dev/null
+From fcee467e7eb1f24692acf9ec656d00807c1a43d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 04:05:26 +0100
+Subject: netfilter: nf_tables: make destruction work queue pernet
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit fb8286562ecfb585e26b033c5e32e6fb85efb0b3 ]
+
+The call to flush_work before tearing down a table from the netlink
+notifier was supposed to make sure that all earlier updates (e.g. rule
+add) that might reference that table have been processed.
+
+Unfortunately, flush_work() waits for the last queued instance.
+This could be an instance that is different from the one that we must
+wait for.
+
+This is because transactions are protected with a pernet mutex, but the
+work item is global, so holding the transaction mutex doesn't prevent
+another netns from queueing more work.
+
+Make the work item pernet so that flush_work() will wait for all
+transactions queued from this netns.
+
+A welcome side effect is that we no longer need to wait for transaction
+objects from foreign netns.
+
+The gc work queue is still global. This seems to be ok because nft_set
+structures are reference counted and each container structure owns a
+reference on the net namespace.
+
+The destroy_list is still protected by a global spinlock rather than
+pernet one but the hold time is very short anyway.
+
+v2: call cancel_work_sync before reaping the remaining tables (Pablo).
+
+Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier")
+Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 4 +++-
+ net/netfilter/nf_tables_api.c | 24 ++++++++++++++----------
+ net/netfilter/nft_compat.c | 8 ++++----
+ 3 files changed, 21 insertions(+), 15 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 788513cc384b7..757abcb54d117 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -1889,7 +1889,7 @@ void nft_chain_filter_fini(void);
+ void __init nft_chain_route_init(void);
+ void nft_chain_route_fini(void);
+
+-void nf_tables_trans_destroy_flush_work(void);
++void nf_tables_trans_destroy_flush_work(struct net *net);
+
+ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
+ __be64 nf_jiffies64_to_msecs(u64 input);
+@@ -1903,6 +1903,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
+ struct nftables_pernet {
+ struct list_head tables;
+ struct list_head commit_list;
++ struct list_head destroy_list;
+ struct list_head commit_set_list;
+ struct list_head binding_list;
+ struct list_head module_list;
+@@ -1913,6 +1914,7 @@ struct nftables_pernet {
+ unsigned int base_seq;
+ unsigned int gc_seq;
+ u8 validate_state;
++ struct work_struct destroy_work;
+ };
+
+ extern unsigned int nf_tables_net_id;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 939510247ef5a..eb3a6f96b094d 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -31,7 +31,6 @@ unsigned int nf_tables_net_id __read_mostly;
+ static LIST_HEAD(nf_tables_expressions);
+ static LIST_HEAD(nf_tables_objects);
+ static LIST_HEAD(nf_tables_flowtables);
+-static LIST_HEAD(nf_tables_destroy_list);
+ static LIST_HEAD(nf_tables_gc_list);
+ static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+ static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
+@@ -122,7 +121,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
+ table->validate_state = new_validate_state;
+ }
+ static void nf_tables_trans_destroy_work(struct work_struct *w);
+-static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+
+ static void nft_trans_gc_work(struct work_struct *work);
+ static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+@@ -9748,11 +9746,12 @@ static void nft_commit_release(struct nft_trans *trans)
+
+ static void nf_tables_trans_destroy_work(struct work_struct *w)
+ {
++ struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
+ struct nft_trans *trans, *next;
+ LIST_HEAD(head);
+
+ spin_lock(&nf_tables_destroy_list_lock);
+- list_splice_init(&nf_tables_destroy_list, &head);
++ list_splice_init(&nft_net->destroy_list, &head);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ if (list_empty(&head))
+@@ -9766,9 +9765,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
+ }
+ }
+
+-void nf_tables_trans_destroy_flush_work(void)
++void nf_tables_trans_destroy_flush_work(struct net *net)
+ {
+- flush_work(&trans_destroy_work);
++ struct nftables_pernet *nft_net = nft_pernet(net);
++
++ flush_work(&nft_net->destroy_work);
+ }
+ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
+
+@@ -10226,11 +10227,11 @@ static void nf_tables_commit_release(struct net *net)
+
+ trans->put_net = true;
+ spin_lock(&nf_tables_destroy_list_lock);
+- list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
++ list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ nf_tables_module_autoload_cleanup(net);
+- schedule_work(&trans_destroy_work);
++ schedule_work(&nft_net->destroy_work);
+
+ mutex_unlock(&nft_net->commit_mutex);
+ }
+@@ -11653,7 +11654,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
+- nf_tables_trans_destroy_flush_work();
++ nf_tables_trans_destroy_flush_work(net);
+ again:
+ list_for_each_entry(table, &nft_net->tables, list) {
+ if (nft_table_has_owner(table) &&
+@@ -11695,6 +11696,7 @@ static int __net_init nf_tables_init_net(struct net *net)
+
+ INIT_LIST_HEAD(&nft_net->tables);
+ INIT_LIST_HEAD(&nft_net->commit_list);
++ INIT_LIST_HEAD(&nft_net->destroy_list);
+ INIT_LIST_HEAD(&nft_net->commit_set_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
+ INIT_LIST_HEAD(&nft_net->module_list);
+@@ -11703,6 +11705,7 @@ static int __net_init nf_tables_init_net(struct net *net)
+ nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
++ INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
+
+ return 0;
+ }
+@@ -11731,14 +11734,17 @@ static void __net_exit nf_tables_exit_net(struct net *net)
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);
+
++ cancel_work_sync(&nft_net->destroy_work);
+ __nft_release_tables(net);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+
+ mutex_unlock(&nft_net->commit_mutex);
++
+ WARN_ON_ONCE(!list_empty(&nft_net->tables));
+ WARN_ON_ONCE(!list_empty(&nft_net->module_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
++ WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
+ }
+
+ static void nf_tables_exit_batch(struct list_head *net_exit_list)
+@@ -11829,10 +11835,8 @@ static void __exit nf_tables_module_exit(void)
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+ nft_chain_filter_fini();
+ nft_chain_route_fini();
+- nf_tables_trans_destroy_flush_work();
+ unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
+- cancel_work_sync(&trans_destroy_work);
+ rcu_barrier();
+ rhltable_destroy(&nft_objname_ht);
+ nf_tables_core_module_exit();
+diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
+index 7ca4f0d21fe2a..72711d62fddfa 100644
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
+ return 0;
+ }
+
+-static void nft_compat_wait_for_destructors(void)
++static void nft_compat_wait_for_destructors(struct net *net)
+ {
+ /* xtables matches or targets can have side effects, e.g.
+ * creation/destruction of /proc files.
+@@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
+ * work queue. If we have pending invocations we thus
+ * need to wait for those to finish.
+ */
+- nf_tables_trans_destroy_flush_work();
++ nf_tables_trans_destroy_flush_work(net);
+ }
+
+ static int
+@@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+
+ nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
+- nft_compat_wait_for_destructors();
++ nft_compat_wait_for_destructors(ctx->net);
+
+ ret = xt_check_target(&par, size, proto, inv);
+ if (ret < 0) {
+@@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+
+ nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
+- nft_compat_wait_for_destructors();
++ nft_compat_wait_for_destructors(ctx->net);
+
+ return xt_check_match(&par, size, proto, inv);
+ }
+--
+2.39.5
+
--- /dev/null
+From 4d015f42fc35d6c48f48ea9fe07e489007d23169 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 17:02:42 +0100
+Subject: netfilter: nft_ct: Use __refcount_inc() for per-CPU
+ nft_ct_pcpu_template.
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+[ Upstream commit 5cfe5612ca9590db69b9be29dc83041dbf001108 ]
+
+nft_ct_pcpu_template is a per-CPU variable and relies on disabled BH for its
+locking. The refcounter is read and if its value is set to one then the
+refcounter is incremented and variable is used - otherwise it is already
+in use and left untouched.
+
+Without per-CPU locking in local_bh_disable() on PREEMPT_RT the
+read-then-increment operation is not atomic and therefore racy.
+
+This can be avoided by using unconditionally __refcount_inc() which will
+increment counter and return the old value as an atomic operation.
+In case the returned counter is not one, the variable is in use and we
+need to decrement counter. Otherwise we can use it.
+
+Use __refcount_inc() instead of read and a conditional increment.
+
+Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_ct.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
+index 67a41cd2baaff..a1b373b99f7b8 100644
+--- a/net/netfilter/nft_ct.c
++++ b/net/netfilter/nft_ct.c
+@@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+ enum ip_conntrack_info ctinfo;
+ u16 value = nft_reg_load16(®s->data[priv->sreg]);
+ struct nf_conn *ct;
++ int oldcnt;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) /* already tracked */
+@@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+
+ ct = this_cpu_read(nft_ct_pcpu_template);
+
+- if (likely(refcount_read(&ct->ct_general.use) == 1)) {
+- refcount_inc(&ct->ct_general.use);
++ __refcount_inc(&ct->ct_general.use, &oldcnt);
++ if (likely(oldcnt == 1)) {
+ nf_ct_zone_add(ct, &zone);
+ } else {
++ refcount_dec(&ct->ct_general.use);
+ /* previous skb got queued to userspace, allocate temporary
+ * one until percpu template can be reused.
+ */
+--
+2.39.5
+
--- /dev/null
+From f98cade26811a10267e4260473b9b35cbe8f86ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 2 Mar 2025 00:14:36 +0300
+Subject: netfilter: nft_exthdr: fix offset with ipv4_find_option()
+
+From: Alexey Kashavkin <akashavkin@gmail.com>
+
+[ Upstream commit 6edd78af9506bb182518da7f6feebd75655d9a0e ]
+
+There is an incorrect calculation in the offset variable which causes
+the nft_skb_copy_to_reg() function to always return -EFAULT. Adding the
+start variable is redundant. In the __ip_options_compile() function the
+correct offset is specified when finding the function. There is no need
+to add the size of the iphdr structure to the offset.
+
+Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
+Signed-off-by: Alexey Kashavkin <akashavkin@gmail.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_exthdr.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
+index b8d03364566c1..c74012c991255 100644
+--- a/net/netfilter/nft_exthdr.c
++++ b/net/netfilter/nft_exthdr.c
+@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+ unsigned char optbuf[sizeof(struct ip_options) + 40];
+ struct ip_options *opt = (struct ip_options *)optbuf;
+ struct iphdr *iph, _iph;
+- unsigned int start;
+ bool found = false;
+ __be32 info;
+ int optlen;
+@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+ iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (!iph)
+ return -EBADMSG;
+- start = sizeof(struct iphdr);
+
+ optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
+ if (optlen <= 0)
+@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+ /* Copy the options since __ip_options_compile() modifies
+ * the options.
+ */
+- if (skb_copy_bits(skb, start, opt->__data, optlen))
++ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen))
+ return -EBADMSG;
+ opt->optlen = optlen;
+
+@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
+ found = target == IPOPT_SSRR ? opt->is_strictroute :
+ !opt->is_strictroute;
+ if (found)
+- *offset = opt->srr + start;
++ *offset = opt->srr;
+ break;
+ case IPOPT_RR:
+ if (!opt->rr)
+ break;
+- *offset = opt->rr + start;
++ *offset = opt->rr;
+ found = true;
+ break;
+ case IPOPT_RA:
+ if (!opt->router_alert)
+ break;
+- *offset = opt->router_alert + start;
++ *offset = opt->router_alert;
+ found = true;
+ break;
+ default:
+--
+2.39.5
+
--- /dev/null
+From 1aad2a728b8eddd9ac5432e3e15449cdf713882d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 05:16:18 -0800
+Subject: netpoll: hold rcu read lock in __netpoll_send_skb()
+
+From: Breno Leitao <leitao@debian.org>
+
+[ Upstream commit 505ead7ab77f289f12d8a68ac83da068e4d4408b ]
+
+The function __netpoll_send_skb() is being invoked without holding the
+RCU read lock. This oversight triggers a warning message when
+CONFIG_PROVE_RCU_LIST is enabled:
+
+ net/core/netpoll.c:330 suspicious rcu_dereference_check() usage!
+
+ netpoll_send_skb
+ netpoll_send_udp
+ write_ext_msg
+ console_flush_all
+ console_unlock
+ vprintk_emit
+
+To prevent npinfo from disappearing unexpectedly, ensure that
+__netpoll_send_skb() is protected with the RCU read lock.
+
+Fixes: 2899656b494dcd1 ("netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()")
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250306-netpoll_rcu_v2-v2-1-bc4f5c51742a@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/netpoll.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/netpoll.c b/net/core/netpoll.c
+index 45fb60bc48039..e95c2933756df 100644
+--- a/net/core/netpoll.c
++++ b/net/core/netpoll.c
+@@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev)
+ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+ {
+ netdev_tx_t status = NETDEV_TX_BUSY;
++ netdev_tx_t ret = NET_XMIT_DROP;
+ struct net_device *dev;
+ unsigned long tries;
+ /* It is up to the caller to keep npinfo alive. */
+@@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+ lockdep_assert_irqs_disabled();
+
+ dev = np->dev;
++ rcu_read_lock();
+ npinfo = rcu_dereference_bh(dev->npinfo);
+
+ if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
+ dev_kfree_skb_irq(skb);
+- return NET_XMIT_DROP;
++ goto out;
+ }
+
+ /* don't get messages out of order, and no recursion */
+@@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+ skb_queue_tail(&npinfo->txq, skb);
+ schedule_delayed_work(&npinfo->tx_work,0);
+ }
+- return NETDEV_TX_OK;
++ ret = NETDEV_TX_OK;
++out:
++ rcu_read_unlock();
++ return ret;
+ }
+
+ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+--
+2.39.5
+
--- /dev/null
+From 88c530d46443ab33fdf54a71e59100680ad539ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 21:02:41 +0100
+Subject: pinctrl: bcm281xx: Fix incorrect regmap max_registers value
+
+From: Artur Weber <aweber.kernel@gmail.com>
+
+[ Upstream commit 68283c1cb573143c0b7515e93206f3503616bc10 ]
+
+The max_registers value does not take into consideration the stride;
+currently, it's set to the number of the last pin, but this does not
+accurately represent the final register.
+
+Fix this by multiplying the current value by 4.
+
+Fixes: 54b1aa5a5b16 ("ARM: pinctrl: Add Broadcom Capri pinctrl driver")
+Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
+Link: https://lore.kernel.org/20250207-bcm21664-pinctrl-v1-2-e7cfac9b2d3b@gmail.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/bcm/pinctrl-bcm281xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
+index 73dbf29c002f3..cf6efa9c0364a 100644
+--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
++++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
+@@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+- .max_register = BCM281XX_PIN_VC_CAM3_SDA,
++ .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4,
+ };
+
+ static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
+--
+2.39.5
+
--- /dev/null
+From 185aa64e71ff5cbda9f8417a419d442f8473b882 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Feb 2025 18:05:32 +0800
+Subject: pinctrl: nuvoton: npcm8xx: Add NULL check in npcm8xx_gpio_fw
+
+From: Charles Han <hanchunchao@inspur.com>
+
+[ Upstream commit acf40ab42799e4ae1397ee6f5c5941092d66f999 ]
+
+devm_kasprintf() calls can return null pointers on failure.
+But the return values were not checked in npcm8xx_gpio_fw().
+Add NULL check in npcm8xx_gpio_fw(), to handle kernel NULL
+pointer dereference error.
+
+Fixes: acf4884a5717 ("pinctrl: nuvoton: add NPCM8XX pinctrl and GPIO driver")
+Signed-off-by: Charles Han <hanchunchao@inspur.com>
+Link: https://lore.kernel.org/20250212100532.4317-1-hanchunchao@inspur.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
+index 471f644c5eef2..d09a5e9b2eca5 100644
+--- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
++++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
+@@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl)
+ pctrl->gpio_bank[id].gc.parent = dev;
+ pctrl->gpio_bank[id].gc.fwnode = child;
+ pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child);
++ if (pctrl->gpio_bank[id].gc.label == NULL)
++ return -ENOMEM;
++
+ pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show;
+ pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input;
+ pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input;
+--
+2.39.5
+
--- /dev/null
+From e8430f1258fc9b391ccc6c2ce546cddc5bb2c2ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Mar 2025 10:06:10 -0500
+Subject: Revert "Bluetooth: hci_core: Fix sleeping function called from
+ invalid context"
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit ab6ab707a4d060a51c45fc13e3b2228d5f7c0b87 ]
+
+This reverts commit 4d94f05558271654670d18c26c912da0c1c15549 which has
+problems (see [1]) and is no longer needed since 581dd2dc168f
+("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating")
+has reworked the code where the original bug has been found.
+
+[1] Link: https://lore.kernel.org/linux-bluetooth/877c55ci1r.wl-tiwai@suse.de/T/#t
+Fixes: 4d94f0555827 ("Bluetooth: hci_core: Fix sleeping function called from invalid context")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/hci_core.h | 108 +++++++++++--------------------
+ net/bluetooth/hci_core.c | 10 ++-
+ net/bluetooth/iso.c | 6 --
+ net/bluetooth/l2cap_core.c | 12 ++--
+ net/bluetooth/rfcomm/core.c | 6 --
+ net/bluetooth/sco.c | 12 ++--
+ 6 files changed, 57 insertions(+), 97 deletions(-)
+
+diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
+index ba7b52584770d..c95f7e6ba2551 100644
+--- a/include/net/bluetooth/hci_core.h
++++ b/include/net/bluetooth/hci_core.h
+@@ -804,6 +804,7 @@ struct hci_conn_params {
+ extern struct list_head hci_dev_list;
+ extern struct list_head hci_cb_list;
+ extern rwlock_t hci_dev_list_lock;
++extern struct mutex hci_cb_list_lock;
+
+ #define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags)
+ #define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags)
+@@ -2006,47 +2007,24 @@ struct hci_cb {
+
+ char *name;
+
+- bool (*match) (struct hci_conn *conn);
+ void (*connect_cfm) (struct hci_conn *conn, __u8 status);
+ void (*disconn_cfm) (struct hci_conn *conn, __u8 status);
+ void (*security_cfm) (struct hci_conn *conn, __u8 status,
+- __u8 encrypt);
++ __u8 encrypt);
+ void (*key_change_cfm) (struct hci_conn *conn, __u8 status);
+ void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role);
+ };
+
+-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list)
+-{
+- struct hci_cb *cb, *cpy;
+-
+- rcu_read_lock();
+- list_for_each_entry_rcu(cb, &hci_cb_list, list) {
+- if (cb->match && cb->match(conn)) {
+- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC);
+- if (!cpy)
+- break;
+-
+- *cpy = *cb;
+- INIT_LIST_HEAD(&cpy->list);
+- list_add_rcu(&cpy->list, list);
+- }
+- }
+- rcu_read_unlock();
+-}
+-
+ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
+ {
+- struct list_head list;
+- struct hci_cb *cb, *tmp;
+-
+- INIT_LIST_HEAD(&list);
+- hci_cb_lookup(conn, &list);
++ struct hci_cb *cb;
+
+- list_for_each_entry_safe(cb, tmp, &list, list) {
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->connect_cfm)
+ cb->connect_cfm(conn, status);
+- kfree(cb);
+ }
++ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->connect_cfm_cb)
+ conn->connect_cfm_cb(conn, status);
+@@ -2054,43 +2032,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
+
+ static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
+ {
+- struct list_head list;
+- struct hci_cb *cb, *tmp;
+-
+- INIT_LIST_HEAD(&list);
+- hci_cb_lookup(conn, &list);
++ struct hci_cb *cb;
+
+- list_for_each_entry_safe(cb, tmp, &list, list) {
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->disconn_cfm)
+ cb->disconn_cfm(conn, reason);
+- kfree(cb);
+ }
++ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->disconn_cfm_cb)
+ conn->disconn_cfm_cb(conn, reason);
+ }
+
+-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status,
+- __u8 encrypt)
+-{
+- struct list_head list;
+- struct hci_cb *cb, *tmp;
+-
+- INIT_LIST_HEAD(&list);
+- hci_cb_lookup(conn, &list);
+-
+- list_for_each_entry_safe(cb, tmp, &list, list) {
+- if (cb->security_cfm)
+- cb->security_cfm(conn, status, encrypt);
+- kfree(cb);
+- }
+-
+- if (conn->security_cfm_cb)
+- conn->security_cfm_cb(conn, status);
+-}
+-
+ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
+ {
++ struct hci_cb *cb;
+ __u8 encrypt;
+
+ if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
+@@ -2098,11 +2055,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
+
+ encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
+
+- hci_security_cfm(conn, status, encrypt);
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
++ if (cb->security_cfm)
++ cb->security_cfm(conn, status, encrypt);
++ }
++ mutex_unlock(&hci_cb_list_lock);
++
++ if (conn->security_cfm_cb)
++ conn->security_cfm_cb(conn, status);
+ }
+
+ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
+ {
++ struct hci_cb *cb;
+ __u8 encrypt;
+
+ if (conn->state == BT_CONFIG) {
+@@ -2129,38 +2095,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
+ conn->sec_level = conn->pending_sec_level;
+ }
+
+- hci_security_cfm(conn, status, encrypt);
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
++ if (cb->security_cfm)
++ cb->security_cfm(conn, status, encrypt);
++ }
++ mutex_unlock(&hci_cb_list_lock);
++
++ if (conn->security_cfm_cb)
++ conn->security_cfm_cb(conn, status);
+ }
+
+ static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
+ {
+- struct list_head list;
+- struct hci_cb *cb, *tmp;
+-
+- INIT_LIST_HEAD(&list);
+- hci_cb_lookup(conn, &list);
++ struct hci_cb *cb;
+
+- list_for_each_entry_safe(cb, tmp, &list, list) {
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->key_change_cfm)
+ cb->key_change_cfm(conn, status);
+- kfree(cb);
+ }
++ mutex_unlock(&hci_cb_list_lock);
+ }
+
+ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
+ __u8 role)
+ {
+- struct list_head list;
+- struct hci_cb *cb, *tmp;
+-
+- INIT_LIST_HEAD(&list);
+- hci_cb_lookup(conn, &list);
++ struct hci_cb *cb;
+
+- list_for_each_entry_safe(cb, tmp, &list, list) {
++ mutex_lock(&hci_cb_list_lock);
++ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->role_switch_cfm)
+ cb->role_switch_cfm(conn, status, role);
+- kfree(cb);
+ }
++ mutex_unlock(&hci_cb_list_lock);
+ }
+
+ static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index b5553c08e7316..72439764186ed 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock);
+
+ /* HCI callback list */
+ LIST_HEAD(hci_cb_list);
++DEFINE_MUTEX(hci_cb_list_lock);
+
+ /* HCI ID Numbering */
+ static DEFINE_IDA(hci_index_ida);
+@@ -2992,7 +2993,9 @@ int hci_register_cb(struct hci_cb *cb)
+ {
+ BT_DBG("%p name %s", cb, cb->name);
+
+- list_add_tail_rcu(&cb->list, &hci_cb_list);
++ mutex_lock(&hci_cb_list_lock);
++ list_add_tail(&cb->list, &hci_cb_list);
++ mutex_unlock(&hci_cb_list_lock);
+
+ return 0;
+ }
+@@ -3002,8 +3005,9 @@ int hci_unregister_cb(struct hci_cb *cb)
+ {
+ BT_DBG("%p name %s", cb, cb->name);
+
+- list_del_rcu(&cb->list);
+- synchronize_rcu();
++ mutex_lock(&hci_cb_list_lock);
++ list_del(&cb->list);
++ mutex_unlock(&hci_cb_list_lock);
+
+ return 0;
+ }
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index bda2f2da7d731..644b606743e21 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -2137,11 +2137,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
+ return HCI_LM_ACCEPT;
+ }
+
+-static bool iso_match(struct hci_conn *hcon)
+-{
+- return hcon->type == ISO_LINK || hcon->type == LE_LINK;
+-}
+-
+ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
+ {
+ if (hcon->type != ISO_LINK) {
+@@ -2323,7 +2318,6 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+
+ static struct hci_cb iso_cb = {
+ .name = "ISO",
+- .match = iso_match,
+ .connect_cfm = iso_connect_cfm,
+ .disconn_cfm = iso_disconn_cfm,
+ };
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 728a5ce9b5058..4db2d6363bbb5 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -7222,11 +7222,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
+ return NULL;
+ }
+
+-static bool l2cap_match(struct hci_conn *hcon)
+-{
+- return hcon->type == ACL_LINK || hcon->type == LE_LINK;
+-}
+-
+ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+ {
+ struct hci_dev *hdev = hcon->hdev;
+@@ -7234,6 +7229,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+ struct l2cap_chan *pchan;
+ u8 dst_type;
+
++ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
++ return;
++
+ BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+
+ if (status) {
+@@ -7298,6 +7296,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
+
+ static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+ {
++ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
++ return;
++
+ BT_DBG("hcon %p reason %d", hcon, reason);
+
+ l2cap_conn_del(hcon, bt_to_errno(reason));
+@@ -7576,7 +7577,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+
+ static struct hci_cb l2cap_cb = {
+ .name = "L2CAP",
+- .match = l2cap_match,
+ .connect_cfm = l2cap_connect_cfm,
+ .disconn_cfm = l2cap_disconn_cfm,
+ .security_cfm = l2cap_security_cfm,
+diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
+index 4c56ca5a216c6..ad5177e3a69b7 100644
+--- a/net/bluetooth/rfcomm/core.c
++++ b/net/bluetooth/rfcomm/core.c
+@@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused)
+ return 0;
+ }
+
+-static bool rfcomm_match(struct hci_conn *hcon)
+-{
+- return hcon->type == ACL_LINK;
+-}
+-
+ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
+ {
+ struct rfcomm_session *s;
+@@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
+
+ static struct hci_cb rfcomm_cb = {
+ .name = "RFCOMM",
+- .match = rfcomm_match,
+ .security_cfm = rfcomm_security_cfm
+ };
+
+diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
+index 071c404c790af..b872a2ca3ff38 100644
+--- a/net/bluetooth/sco.c
++++ b/net/bluetooth/sco.c
+@@ -1355,13 +1355,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
+ return lm;
+ }
+
+-static bool sco_match(struct hci_conn *hcon)
+-{
+- return hcon->type == SCO_LINK || hcon->type == ESCO_LINK;
+-}
+-
+ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+ {
++ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
++ return;
++
+ BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
+
+ if (!status) {
+@@ -1376,6 +1374,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+
+ static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+ {
++ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
++ return;
++
+ BT_DBG("hcon %p reason %d", hcon, reason);
+
+ sco_conn_del(hcon, bt_to_errno(reason));
+@@ -1401,7 +1402,6 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
+
+ static struct hci_cb sco_cb = {
+ .name = "SCO",
+- .match = sco_match,
+ .connect_cfm = sco_connect_cfm,
+ .disconn_cfm = sco_disconn_cfm,
+ };
+--
+2.39.5
+
--- /dev/null
+From bd1fb08e759aa9beac47de9de67858357e9ade90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Mar 2025 13:05:43 -0500
+Subject: Revert "openvswitch: switch to per-action label counting in
+ conntrack"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 1063ae07383c0ddc5bcce170260c143825846b03 ]
+
+Currently, ovs_ct_set_labels() is only called for confirmed conntrack
+entries (ct) within ovs_ct_commit(). However, if the conntrack entry
+does not have the labels_ext extension, attempting to allocate it in
+ovs_ct_get_conn_labels() for a confirmed entry triggers a warning in
+nf_ct_ext_add():
+
+ WARN_ON(nf_ct_is_confirmed(ct));
+
+This happens when the conntrack entry is created externally before OVS
+increments net->ct.labels_used. The issue has become more likely since
+commit fcb1aa5163b1 ("openvswitch: switch to per-action label counting
+in conntrack"), which changed to use per-action label counting and
+increment net->ct.labels_used when a flow with ct action is added.
+
+Since there’s no straightforward way to fully resolve this issue at the
+moment, this reverts the commit to avoid breaking existing use cases.
+
+Fixes: fcb1aa5163b1 ("openvswitch: switch to per-action label counting in conntrack")
+Reported-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Aaron Conole <aconole@redhat.com>
+Link: https://patch.msgid.link/1bdeb2f3a812bca016a225d3de714427b2cd4772.1741457143.git.lucien.xin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/conntrack.c | 30 ++++++++++++++++++------------
+ net/openvswitch/datapath.h | 3 +++
+ 2 files changed, 21 insertions(+), 12 deletions(-)
+
+diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
+index 3bb4810234aac..e573e92213029 100644
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
+ attr == OVS_KEY_ATTR_CT_MARK)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+- attr == OVS_KEY_ATTR_CT_LABELS)
+- return true;
++ attr == OVS_KEY_ATTR_CT_LABELS) {
++ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
++
++ return ovs_net->xt_label;
++ }
+
+ return false;
+ }
+@@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa, bool log)
+ {
+- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+ struct ovs_conntrack_info ct_info;
+ const char *helper = NULL;
+ u16 family;
+@@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+ return -ENOMEM;
+ }
+
+- if (nf_connlabels_get(net, n_bits - 1)) {
+- nf_ct_tmpl_free(ct_info.ct);
+- OVS_NLERR(log, "Failed to set connlabel length");
+- return -EOPNOTSUPP;
+- }
+-
+ if (ct_info.timeout[0]) {
+ if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
+ ct_info.timeout))
+@@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
+ if (ct_info->ct) {
+ if (ct_info->timeout[0])
+ nf_ct_destroy_timeout(ct_info->ct);
+- nf_connlabels_put(nf_ct_net(ct_info->ct));
+ nf_ct_tmpl_free(ct_info->ct);
+ }
+ }
+@@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
+
+ int ovs_ct_init(struct net *net)
+ {
+-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
++ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
++ if (nf_connlabels_get(net, n_bits - 1)) {
++ ovs_net->xt_label = false;
++ OVS_NLERR(true, "Failed to set connlabel length");
++ } else {
++ ovs_net->xt_label = true;
++ }
++
++#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ return ovs_ct_limit_init(net, ovs_net);
+ #else
+ return 0;
+@@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net)
+
+ void ovs_ct_exit(struct net *net)
+ {
+-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
++#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ ovs_ct_limit_exit(net, ovs_net);
+ #endif
++
++ if (ovs_net->xt_label)
++ nf_connlabels_put(net);
+ }
+diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
+index 365b9bb7f546e..9ca6231ea6470 100644
+--- a/net/openvswitch/datapath.h
++++ b/net/openvswitch/datapath.h
+@@ -160,6 +160,9 @@ struct ovs_net {
+ #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ struct ovs_ct_limit_info *ct_limit_info;
+ #endif
++
++ /* Module reference for configuring conntrack. */
++ bool xt_label;
+ };
+
+ /**
+--
+2.39.5
+
--- /dev/null
+From b780a7521888acf2e67f2a55cad635d8015d8cd1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 15:05:10 +0800
+Subject: rtase: Fix improper release of ring list entries in rtase_sw_reset
+
+From: Justin Lai <justinlai0215@realtek.com>
+
+[ Upstream commit 415f135ace7fd824cde083184a922e39156055b5 ]
+
+Since rtase_init_ring, which is called within rtase_sw_reset, adds ring
+entries already present in the ring list back into the list, it causes
+the ring list to form a cycle. This results in list_for_each_entry_safe
+failing to find an endpoint during traversal, leading to an error.
+Therefore, it is necessary to remove the previously added ring_list nodes
+before calling rtase_init_ring.
+
+Fixes: 079600489960 ("rtase: Implement net_device_ops")
+Signed-off-by: Justin Lai <justinlai0215@realtek.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250306070510.18129-1-justinlai0215@realtek.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/realtek/rtase/rtase_main.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
+index 14ffd45e9a25a..86dd034fdddc5 100644
+--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
++++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
+@@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev)
+ static void rtase_sw_reset(struct net_device *dev)
+ {
+ struct rtase_private *tp = netdev_priv(dev);
++ struct rtase_ring *ring, *tmp;
++ struct rtase_int_vector *ivec;
+ int ret;
++ u32 i;
+
+ netif_stop_queue(dev);
+ netif_carrier_off(dev);
+@@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev)
+ rtase_tx_clear(tp);
+ rtase_rx_clear(tp);
+
++ for (i = 0; i < tp->int_nums; i++) {
++ ivec = &tp->int_vector[i];
++ list_for_each_entry_safe(ring, tmp, &ivec->ring_list,
++ ring_entry)
++ list_del(&ring->ring_entry);
++ }
++
+ ret = rtase_init_ring(dev);
+ if (ret) {
+ netdev_err(dev, "unable to init ring\n");
+--
+2.39.5
+
--- /dev/null
+From b03e29f3ed607b33fc054c72d511d133a6d5421f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Mar 2025 23:44:10 +0800
+Subject: sched: address a potential NULL pointer dereference in the GRED
+ scheduler.
+
+From: Jun Yang <juny24602@gmail.com>
+
+[ Upstream commit 115ef44a98220fddfab37a39a19370497cd718b9 ]
+
+If kzalloc in gred_init returns a NULL pointer, the code follows the
+error handling path, invoking gred_destroy. This, in turn, calls
+gred_offload, where memset could receive a NULL pointer as input,
+potentially leading to a kernel crash.
+
+When table->opt is NULL in gred_init(), gred_change_table_def()
+is not called yet, so it is not necessary to call ->ndo_setup_tc()
+in gred_offload().
+
+Signed-off-by: Jun Yang <juny24602@gmail.com>
+Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
+Fixes: f25c0515c521 ("net: sched: gred: dynamically allocate tc_gred_qopt_offload")
+Link: https://patch.msgid.link/20250305154410.3505642-1-juny24602@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_gred.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
+index 79ba9dc702541..43b0343a7cd0c 100644
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch)
+ for (i = 0; i < table->DPs; i++)
+ gred_destroy_vq(table->tab[i]);
+
+- gred_offload(sch, TC_GRED_DESTROY);
++ if (table->opt)
++ gred_offload(sch, TC_GRED_DESTROY);
+ kfree(table->opt);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From a947b9dca3df5c2e0d92b532b1539c42e06da9a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 02:39:23 +0000
+Subject: selftests: bonding: fix incorrect mac address
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 9318dc2357b6b8b2ea1200ab7f2d5877851b7382 ]
+
+The correct mac address for NS target 2001:db8::254 is 33:33:ff:00:02:54,
+not 33:33:00:00:02:54. The same with client maddress.
+
+Fixes: 86fb6173d11e ("selftests: bonding: add ns multicast group testing")
+Acked-by: Jay Vosburgh <jv@jvosburgh.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250306023923.38777-3-liuhangbin@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/drivers/net/bonding/bond_options.sh | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+index edc56e2cc6069..7bc148889ca72 100755
+--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
++++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+@@ -11,8 +11,8 @@ ALL_TESTS="
+
+ lib_dir=$(dirname "$0")
+ source ${lib_dir}/bond_topo_3d1c.sh
+-c_maddr="33:33:00:00:00:10"
+-g_maddr="33:33:00:00:02:54"
++c_maddr="33:33:ff:00:00:10"
++g_maddr="33:33:ff:00:02:54"
+
+ skip_prio()
+ {
+--
+2.39.5
+
mm-slab-kvfree_rcu-switch-to-wq_mem_reclaim-wq.patch
mm-fix-kernel-bug-when-userfaultfd_move-encounters-swapcache.patch
userfaultfd-fix-pte-unmapping-stack-allocated-pte-copies.patch
+fbdev-hyperv_fb-iounmap-the-correct-memory-when-remo.patch
+pinctrl-bcm281xx-fix-incorrect-regmap-max_registers-.patch
+pinctrl-nuvoton-npcm8xx-add-null-check-in-npcm8xx_gp.patch
+netfilter-nft_ct-use-__refcount_inc-for-per-cpu-nft_.patch
+ice-do-not-configure-destination-override-for-switch.patch
+ice-fix-memory-leak-in-arfs-after-reset.patch
+ice-fix-switchdev-slow-path-in-lag.patch
+netfilter-nf_conncount-garbage-collection-is-not-ski.patch
+netfilter-nf_tables-make-destruction-work-queue-pern.patch
+sched-address-a-potential-null-pointer-dereference-i.patch
+wifi-iwlwifi-mvm-fix-pnvm-timeout-for-non-msi-x-plat.patch
+wifi-mac80211-don-t-queue-sdata-work-for-a-non-runni.patch
+wifi-cfg80211-cancel-wiphy_work-before-freeing-wiphy.patch
+bluetooth-hci_event-fix-enabling-passive-scanning.patch
+revert-bluetooth-hci_core-fix-sleeping-function-call.patch
+net-mlx5-fill-out-devlink-dev-info-only-for-pfs.patch
+net-dsa-mv88e6xxx-verify-after-atu-load-ops.patch
+net-mctp-i3c-copy-headers-if-cloned.patch
+net-mctp-i2c-copy-headers-if-cloned.patch
+netpoll-hold-rcu-read-lock-in-__netpoll_send_skb.patch
+drm-hyperv-fix-address-space-leak-when-hyper-v-drm-d.patch
+fbdev-hyperv_fb-fix-hang-in-kdump-kernel-when-on-hyp.patch
+fbdev-hyperv_fb-simplify-hvfb_putmem.patch
+fbdev-hyperv_fb-allow-graceful-removal-of-framebuffe.patch
+drivers-hv-vmbus-don-t-release-fb_mmio-resource-in-v.patch
+net-mlx5-handle-errors-in-mlx5_chains_create_table.patch
+eth-bnxt-fix-truesize-for-mb-xdp-pass-case.patch
+eth-bnxt-return-fail-if-interface-is-down-in-bnxt_qu.patch
+eth-bnxt-do-not-use-bnxt_vnic_ntuple-unconditionally.patch
+eth-bnxt-do-not-update-checksum-in-bnxt_xdp_build_sk.patch
+eth-bnxt-fix-kernel-panic-in-the-bnxt_get_queue_stat.patch
+eth-bnxt-use-page-pool-for-head-frags.patch
+bnxt_en-refactor-tpa_info-alloc-free-into-helpers.patch
+bnxt_en-handle-tpa_info-in-queue-api-implementation.patch
+eth-bnxt-fix-memory-leak-in-queue-reset.patch
+net-switchdev-convert-blocking-notification-chain-to.patch
+net-mctp-unshare-packets-when-reassembling.patch
+bonding-fix-incorrect-mac-address-setting-to-receive.patch
+selftests-bonding-fix-incorrect-mac-address.patch
+rtase-fix-improper-release-of-ring-list-entries-in-r.patch
+netfilter-nf_conncount-fully-initialize-struct-nf_co.patch
+ipvs-prevent-integer-overflow-in-do_ip_vs_get_ctl.patch
+net_sched-prevent-creation-of-classes-with-tc_h_root.patch
+netfilter-nft_exthdr-fix-offset-with-ipv4_find_optio.patch
+gre-fix-ipv6-link-local-address-generation.patch
+net-openvswitch-remove-misbehaving-actions-length-ch.patch
+revert-openvswitch-switch-to-per-action-label-counti.patch
+net-mlx5-hws-rightsize-bwc-matcher-priority.patch
+net-mlx5-fix-incorrect-irq-pool-usage-when-releasing.patch
+net-mlx5-lag-check-shared-fdb-before-creating-multip.patch
+net-mlx5-bridge-fix-the-crash-caused-by-lag-state-ch.patch
+net-mlx5e-prevent-bridge-link-show-failure-for-non-e.patch
--- /dev/null
+From 31cf668eebe756416649f302ac82aa5faaabb577 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 12:37:59 +0200
+Subject: wifi: cfg80211: cancel wiphy_work before freeing wiphy
+
+From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+
+[ Upstream commit 72d520476a2fab6f3489e8388ab524985d6c4b90 ]
+
+A wiphy_work can be queued from the moment the wiphy is allocated and
+initialized (i.e. wiphy_new_nm). When a wiphy_work is queued, the
+rdev::wiphy_work is getting queued.
+
+If wiphy_free is called before the rdev::wiphy_work had a chance to run,
+the wiphy memory will be freed, and then when it eventally gets to run
+it'll use invalid memory.
+
+Fix this by canceling the work before freeing the wiphy.
+
+Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics")
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Link: https://patch.msgid.link/20250306123626.efd1d19f6e07.I48229f96f4067ef73f5b87302335e2fd750136c9@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/core.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/net/wireless/core.c b/net/wireless/core.c
+index 7d313fb66d76b..1ce8fff2a28a4 100644
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -1198,6 +1198,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
+ {
+ struct cfg80211_internal_bss *scan, *tmp;
+ struct cfg80211_beacon_registration *reg, *treg;
++ unsigned long flags;
++
++ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
++ WARN_ON(!list_empty(&rdev->wiphy_work_list));
++ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
++ cancel_work_sync(&rdev->wiphy_work);
++
+ rfkill_destroy(rdev->wiphy.rfkill);
+ list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
+ list_del(®->list);
+--
+2.39.5
+
--- /dev/null
+From 50435573e58a91889eedbc976f3108ce75d9d1e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 12:25:47 +0200
+Subject: wifi: iwlwifi: mvm: fix PNVM timeout for non-MSI-X platforms
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+[ Upstream commit b8c8a03e9b7bfc06f366b75daf3d0812400e7123 ]
+
+When MSI-X is not enabled, we mask all the interrupts in the interrupt
+handler and re-enable them when the interrupt thread runs. If
+STATUS_INT_ENABLED is not set, we won't re-enable in the thread.
+In order to get the ALIVE interrupt, we allow the ALIVE interrupt
+itself, and RX as well in order to receive the ALIVE notification (which
+is received as an RX from the firmware.
+
+The problem is that STATUS_INT_ENABLED is clear until the op_mode calls
+trans_fw_alive which means that until trans_fw_alive is called, any
+notification from the firmware will not be received.
+
+This became a problem when we inserted the pnvm_load exactly between the
+ALIVE and trans_fw_alive.
+
+Fix that by calling trans_fw_alive before loading the PNVM. This will
+allow to get the notification from the firmware about PNVM load being
+complete and continue the flow normally.
+
+This didn't happen on MSI-X because we don't disable the interrupts in
+the ISR when MSI-X is available.
+
+The error in the log looks like this:
+
+iwlwifi 0000:00:03.0: Timeout waiting for PNVM load!
+iwlwifi 0000:00:03.0: Failed to start RT ucode: -110
+iwlwifi 0000:00:03.0: WRT: Collecting data: ini trigger 13 fired (delay=0ms).
+
+Fixes: 70d3ca86b025 ("iwlwifi: mvm: ring the doorbell and wait for PNVM load completion")
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://patch.msgid.link/20250306122425.0f2cf207aae1.I025d8f724b44f52eadf6c19069352eb9275613a8@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+index f30b0fc8eca97..2b9a684cf61d5 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+ /*
+- * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
++ * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
+ * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ */
+@@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
+ /* if reached this point, Alive notification was received */
+ iwl_mei_alive_notif(true);
+
++ iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr);
++
+ ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait,
+ &mvm->fw->ucode_capa);
+ if (ret) {
+@@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
+ return ret;
+ }
+
+- iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr);
+-
+ /*
+ * Note: all the queues are enabled as part of the interface
+ * initialization, but in firmware restart scenarios they
+--
+2.39.5
+
--- /dev/null
+From b0f2a516a4f3dfb9b3007b6a3f8c34ac4b7c21f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Mar 2025 12:37:56 +0200
+Subject: wifi: mac80211: don't queue sdata::work for a non-running sdata
+
+From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+
+[ Upstream commit 20d5a0b9cd0ccb32e886cf6baecf14936325bf10 ]
+
+The worker really shouldn't be queued for a non-running interface.
+Also, if ieee80211_setup_sdata is called between queueing and executing
+the wk, it will be initialized, which will corrupt wiphy_work_list.
+
+Fixes: f8891461a277 ("mac80211: do not start any work during reconfigure flow")
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Reviewed-by: Johannes Berg <johannes.berg@intel.com>
+Link: https://patch.msgid.link/20250306123626.1e02caf82640.I4949e71ed56e7186ed4968fa9ddff477473fa2f4@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/util.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/mac80211/util.c b/net/mac80211/util.c
+index 38c30e4ddda98..2b6e8e7307ee5 100644
+--- a/net/mac80211/util.c
++++ b/net/mac80211/util.c
+@@ -6,7 +6,7 @@
+ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
+- * Copyright (C) 2018-2024 Intel Corporation
++ * Copyright (C) 2018-2025 Intel Corporation
+ *
+ * utilities for mac80211
+ */
+@@ -2184,8 +2184,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
+ ieee80211_reconfig_roc(local);
+
+ /* Requeue all works */
+- list_for_each_entry(sdata, &local->interfaces, list)
+- wiphy_work_queue(local->hw.wiphy, &sdata->work);
++ list_for_each_entry(sdata, &local->interfaces, list) {
++ if (ieee80211_sdata_running(sdata))
++ wiphy_work_queue(local->hw.wiphy, &sdata->work);
++ }
+ }
+
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+--
+2.39.5
+