]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Tue, 6 May 2025 11:50:08 +0000 (07:50 -0400)
committerSasha Levin <sashal@kernel.org>
Tue, 6 May 2025 11:50:08 +0000 (07:50 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
14 files changed:
queue-5.10/bnxt_en-fix-ethtool-d-byte-order-for-32-bit-values.patch [new file with mode: 0644]
queue-5.10/net-dlink-correct-endianness-handling-of-led_mode.patch [new file with mode: 0644]
queue-5.10/net-fec-err007885-workaround-for-conventional-tx.patch [new file with mode: 0644]
queue-5.10/net-ipv6-fix-udpv6-gso-segmentation-with-nat.patch [new file with mode: 0644]
queue-5.10/net-lan743x-fix-memleak-issue-when-gso-enabled.patch [new file with mode: 0644]
queue-5.10/net-mlx5-e-switch-fix-error-handling-for-enabling-ro.patch [new file with mode: 0644]
queue-5.10/net-mlx5-e-switch-initialize-mac-address-for-default.patch [new file with mode: 0644]
queue-5.10/net-mlx5-remove-return-statement-exist-at-the-end-of.patch [new file with mode: 0644]
queue-5.10/net_sched-drr-fix-double-list-add-in-class-with-nete.patch [new file with mode: 0644]
queue-5.10/net_sched-ets-fix-double-list-add-in-class-with-nete.patch [new file with mode: 0644]
queue-5.10/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-with.patch [new file with mode: 0644]
queue-5.10/net_sched-qfq-fix-double-list-add-in-class-with-nete.patch [new file with mode: 0644]
queue-5.10/nvme-tcp-fix-premature-queue-removal-and-i-o-failove.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/bnxt_en-fix-ethtool-d-byte-order-for-32-bit-values.patch b/queue-5.10/bnxt_en-fix-ethtool-d-byte-order-for-32-bit-values.patch
new file mode 100644 (file)
index 0000000..6bb8ba8
--- /dev/null
@@ -0,0 +1,84 @@
+From a84afeab035aeaa9986f849783c5f4ff0b02dbe5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Apr 2025 15:59:03 -0700
+Subject: bnxt_en: Fix ethtool -d byte order for 32-bit values
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 02e8be5a032cae0f4ca33c6053c44d83cf4acc93 ]
+
+For version 1 register dump that includes the PCIe stats, the existing
+code incorrectly assumes that all PCIe stats are 64-bit values.  Fix it
+by using an array containing the starting and ending index of the 32-bit
+values.  The loop in bnxt_get_regs() will use the array to do proper
+endian swap for the 32-bit values.
+
+Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'")
+Reviewed-by: Shruti Parab <shruti.parab@broadcom.com>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 36 ++++++++++++++++---
+ 1 file changed, 31 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+index 2984234df67eb..4dfb65b0bf1c6 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+@@ -1344,6 +1344,17 @@ static int bnxt_get_regs_len(struct net_device *dev)
+       return reg_len;
+ }
++#define BNXT_PCIE_32B_ENTRY(start, end)                       \
++       { offsetof(struct pcie_ctx_hw_stats, start),   \
++         offsetof(struct pcie_ctx_hw_stats, end) }
++
++static const struct {
++      u16 start;
++      u16 end;
++} bnxt_pcie_32b_entries[] = {
++      BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]),
++};
++
+ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+                         void *_p)
+ {
+@@ -1372,12 +1383,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (!rc) {
+-              __le64 *src = (__le64 *)hw_pcie_stats;
+-              u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN);
+-              int i;
++              u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN);
++              u8 *src = (u8 *)hw_pcie_stats;
++              int i, j;
++
++              for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) {
++                      if (i >= bnxt_pcie_32b_entries[j].start &&
++                          i <= bnxt_pcie_32b_entries[j].end) {
++                              u32 *dst32 = (u32 *)(dst + i);
++
++                              *dst32 = le32_to_cpu(*(__le32 *)(src + i));
++                              i += 4;
++                              if (i > bnxt_pcie_32b_entries[j].end &&
++                                  j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1)
++                                      j++;
++                      } else {
++                              u64 *dst64 = (u64 *)(dst + i);
+-              for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++)
+-                      dst[i] = le64_to_cpu(src[i]);
++                              *dst64 = le64_to_cpu(*(__le64 *)(src + i));
++                              i += 8;
++                      }
++              }
+       }
+       mutex_unlock(&bp->hwrm_cmd_lock);
+       dma_free_coherent(&bp->pdev->dev, sizeof(*hw_pcie_stats), hw_pcie_stats,
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-dlink-correct-endianness-handling-of-led_mode.patch b/queue-5.10/net-dlink-correct-endianness-handling-of-led_mode.patch
new file mode 100644 (file)
index 0000000..dbe0e51
--- /dev/null
@@ -0,0 +1,80 @@
+From 794991d2f47b34726a140af1a44a2596a15f014b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 16:50:47 +0100
+Subject: net: dlink: Correct endianness handling of led_mode
+
+From: Simon Horman <horms@kernel.org>
+
+[ Upstream commit e7e5ae71831c44d58627a991e603845a2fed2cab ]
+
+As it's name suggests, parse_eeprom() parses EEPROM data.
+
+This is done by reading data, 16 bits at a time as follows:
+
+       for (i = 0; i < 128; i++)
+                ((__le16 *) sromdata)[i] = cpu_to_le16(read_eeprom(np, i));
+
+sromdata is at the same memory location as psrom.
+And the type of psrom is a pointer to struct t_SROM.
+
+As can be seen in the loop above, data is stored in sromdata, and thus psrom,
+as 16-bit little-endian values.
+
+However, the integer fields of t_SROM are host byte order integers.
+And in the case of led_mode this leads to a little endian value
+being incorrectly treated as host byte order.
+
+Looking at rio_set_led_mode, this does appear to be a bug as that code
+masks led_mode with 0x1, 0x2 and 0x8. Logic that would be effected by a
+reversed byte order.
+
+This problem would only manifest on big endian hosts.
+
+Found by inspection while investigating a sparse warning
+regarding the crc field of t_SROM.
+
+I believe that warning is a false positive. And although I plan
+to send a follow-up to use little-endian types for other the integer
+fields of PSROM_t I do not believe that will involve any bug fixes.
+
+Compile tested only.
+
+Fixes: c3f45d322cbd ("dl2k: Add support for IP1000A-based cards")
+Signed-off-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250425-dlink-led-mode-v1-1-6bae3c36e736@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/dlink/dl2k.c | 2 +-
+ drivers/net/ethernet/dlink/dl2k.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
+index 734acb834c986..66e0fbdcef220 100644
+--- a/drivers/net/ethernet/dlink/dl2k.c
++++ b/drivers/net/ethernet/dlink/dl2k.c
+@@ -353,7 +353,7 @@ parse_eeprom (struct net_device *dev)
+               dev->dev_addr[i] = psrom->mac_addr[i];
+       if (np->chip_id == CHIP_IP1000A) {
+-              np->led_mode = psrom->led_mode;
++              np->led_mode = le16_to_cpu(psrom->led_mode);
+               return 0;
+       }
+diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
+index 195dc6cfd8955..0e33e2eaae960 100644
+--- a/drivers/net/ethernet/dlink/dl2k.h
++++ b/drivers/net/ethernet/dlink/dl2k.h
+@@ -335,7 +335,7 @@ typedef struct t_SROM {
+       u16 sub_system_id;      /* 0x06 */
+       u16 pci_base_1;         /* 0x08 (IP1000A only) */
+       u16 pci_base_2;         /* 0x0a (IP1000A only) */
+-      u16 led_mode;           /* 0x0c (IP1000A only) */
++      __le16 led_mode;        /* 0x0c (IP1000A only) */
+       u16 reserved1[9];       /* 0x0e-0x1f */
+       u8 mac_addr[6];         /* 0x20-0x25 */
+       u8 reserved2[10];       /* 0x26-0x2f */
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-fec-err007885-workaround-for-conventional-tx.patch b/queue-5.10/net-fec-err007885-workaround-for-conventional-tx.patch
new file mode 100644 (file)
index 0000000..0ff5441
--- /dev/null
@@ -0,0 +1,55 @@
+From 2ba79af98ed25c4730394bfd58a9d6cfd48cf647 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Apr 2025 11:08:26 +0200
+Subject: net: fec: ERR007885 Workaround for conventional TX
+
+From: Mattias Barthel <mattias.barthel@atlascopco.com>
+
+[ Upstream commit a179aad12badc43201cbf45d1e8ed2c1383c76b9 ]
+
+Activate TX hang workaround also in
+fec_enet_txq_submit_skb() when TSO is not enabled.
+
+Errata: ERR007885
+
+Symptoms: NETDEV WATCHDOG: eth0 (fec): transmit queue 0 timed out
+
+commit 37d6017b84f7 ("net: fec: Workaround for imx6sx enet tx hang when enable three queues")
+There is a TDAR race condition for mutliQ when the software sets TDAR
+and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles).
+This will cause the udma_tx and udma_tx_arbiter state machines to hang.
+
+So, the Workaround is checking TDAR status four time, if TDAR cleared by
+    hardware and then write TDAR, otherwise don't set TDAR.
+
+Fixes: 53bb20d1faba ("net: fec: add variable reg_desc_active to speed things up")
+Signed-off-by: Mattias Barthel <mattias.barthel@atlascopco.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20250429090826.3101258-1-mattiasbarthel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 8e30e999456d4..805434ba3035b 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -602,7 +602,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
+       txq->bd.cur = bdp;
+       /* Trigger transmission start */
+-      writel(0, txq->bd.reg_desc_active);
++      if (!(fep->quirks & FEC_QUIRK_ERR007885) ||
++          !readl(txq->bd.reg_desc_active) ||
++          !readl(txq->bd.reg_desc_active) ||
++          !readl(txq->bd.reg_desc_active) ||
++          !readl(txq->bd.reg_desc_active))
++              writel(0, txq->bd.reg_desc_active);
+       return 0;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-ipv6-fix-udpv6-gso-segmentation-with-nat.patch b/queue-5.10/net-ipv6-fix-udpv6-gso-segmentation-with-nat.patch
new file mode 100644 (file)
index 0000000..93ab960
--- /dev/null
@@ -0,0 +1,104 @@
+From e53ac9227ede2e94c09be0acedae5806d23a35cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 26 Apr 2025 17:32:09 +0200
+Subject: net: ipv6: fix UDPv6 GSO segmentation with NAT
+
+From: Felix Fietkau <nbd@nbd.name>
+
+[ Upstream commit b936a9b8d4a585ccb6d454921c36286bfe63e01d ]
+
+If any address or port is changed, update it in all packets and recalculate
+checksum.
+
+Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20250426153210.14044-1-nbd@nbd.name
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/udp_offload.c | 61 +++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 60 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index b6952b88b5051..73beaa7e2d703 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -242,6 +242,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+       return segs;
+ }
++static void __udpv6_gso_segment_csum(struct sk_buff *seg,
++                                   struct in6_addr *oldip,
++                                   const struct in6_addr *newip,
++                                   __be16 *oldport, __be16 newport)
++{
++      struct udphdr *uh = udp_hdr(seg);
++
++      if (ipv6_addr_equal(oldip, newip) && *oldport == newport)
++              return;
++
++      if (uh->check) {
++              inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32,
++                                        newip->s6_addr32, true);
++
++              inet_proto_csum_replace2(&uh->check, seg, *oldport, newport,
++                                       false);
++              if (!uh->check)
++                      uh->check = CSUM_MANGLED_0;
++      }
++
++      *oldip = *newip;
++      *oldport = newport;
++}
++
++static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs)
++{
++      const struct ipv6hdr *iph;
++      const struct udphdr *uh;
++      struct ipv6hdr *iph2;
++      struct sk_buff *seg;
++      struct udphdr *uh2;
++
++      seg = segs;
++      uh = udp_hdr(seg);
++      iph = ipv6_hdr(seg);
++      uh2 = udp_hdr(seg->next);
++      iph2 = ipv6_hdr(seg->next);
++
++      if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) &&
++          ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
++          ipv6_addr_equal(&iph->daddr, &iph2->daddr))
++              return segs;
++
++      while ((seg = seg->next)) {
++              uh2 = udp_hdr(seg);
++              iph2 = ipv6_hdr(seg);
++
++              __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
++                                       &uh2->source, uh->source);
++              __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
++                                       &uh2->dest, uh->dest);
++      }
++
++      return segs;
++}
++
+ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+                                             netdev_features_t features,
+                                             bool is_ipv6)
+@@ -254,7 +310,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+       udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+-      return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb);
++      if (is_ipv6)
++              return __udpv6_gso_segment_list_csum(skb);
++      else
++              return __udpv4_gso_segment_list_csum(skb);
+ }
+ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-lan743x-fix-memleak-issue-when-gso-enabled.patch b/queue-5.10/net-lan743x-fix-memleak-issue-when-gso-enabled.patch
new file mode 100644 (file)
index 0000000..91f9675
--- /dev/null
@@ -0,0 +1,81 @@
+From 1c16a124a89141b69034b47ac623e7633281db8e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Apr 2025 10:55:27 +0530
+Subject: net: lan743x: Fix memleak issue when GSO enabled
+
+From: Thangaraj Samynathan <thangaraj.s@microchip.com>
+
+[ Upstream commit 2d52e2e38b85c8b7bc00dca55c2499f46f8c8198 ]
+
+Always map the `skb` to the LS descriptor. Previously skb was
+mapped to EXT descriptor when the number of fragments is zero with
+GSO enabled. Mapping the skb to EXT descriptor prevents it from
+being freed, leading to a memory leak
+
+Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
+Signed-off-by: Thangaraj Samynathan <thangaraj.s@microchip.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250429052527.10031-1-thangaraj.s@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++++--
+ drivers/net/ethernet/microchip/lan743x_main.h | 1 +
+ 2 files changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
+index 50cb1c5251f71..a0f490a907573 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.c
++++ b/drivers/net/ethernet/microchip/lan743x_main.c
+@@ -1475,6 +1475,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
+       if (nr_frags <= 0) {
+               tx->frame_data0 |= TX_DESC_DATA0_LS_;
+               tx->frame_data0 |= TX_DESC_DATA0_IOC_;
++              tx->frame_last = tx->frame_first;
+       }
+       tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       tx_descriptor->data0 = cpu_to_le32(tx->frame_data0);
+@@ -1544,6 +1545,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
+               tx->frame_first = 0;
+               tx->frame_data0 = 0;
+               tx->frame_tail = 0;
++              tx->frame_last = 0;
+               return -ENOMEM;
+       }
+@@ -1584,16 +1586,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx,
+           TX_DESC_DATA0_DTYPE_DATA_) {
+               tx->frame_data0 |= TX_DESC_DATA0_LS_;
+               tx->frame_data0 |= TX_DESC_DATA0_IOC_;
++              tx->frame_last = tx->frame_tail;
+       }
+-      tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+-      buffer_info = &tx->buffer_info[tx->frame_tail];
++      tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last];
++      buffer_info = &tx->buffer_info[tx->frame_last];
+       buffer_info->skb = skb;
+       if (time_stamp)
+               buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED;
+       if (ignore_sync)
+               buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC;
++      tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+       tx_descriptor->data0 = cpu_to_le32(tx->frame_data0);
+       tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+       tx->last_tail = tx->frame_tail;
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
+index 751f2bc9ce84e..2a40cc827b187 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.h
++++ b/drivers/net/ethernet/microchip/lan743x_main.h
+@@ -657,6 +657,7 @@ struct lan743x_tx {
+       u32             frame_first;
+       u32             frame_data0;
+       u32             frame_tail;
++      u32             frame_last;
+       struct lan743x_tx_buffer_info *buffer_info;
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-mlx5-e-switch-fix-error-handling-for-enabling-ro.patch b/queue-5.10/net-mlx5-e-switch-fix-error-handling-for-enabling-ro.patch
new file mode 100644 (file)
index 0000000..4b4e965
--- /dev/null
@@ -0,0 +1,110 @@
+From 7086759eae5b5deace3062cac966a48dd35767e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 11:36:11 +0300
+Subject: net/mlx5: E-switch, Fix error handling for enabling roce
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 90538d23278a981e344d364e923162fce752afeb ]
+
+The cited commit assumes enabling roce always succeeds. But it is
+not true. Add error handling for it.
+
+Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250423083611.324567-6-mbloch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c   | 5 ++++-
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.c           | 9 +++++----
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.h           | 4 ++--
+ 3 files changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index ccc7dd3e738a4..7c6646f932b69 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2317,7 +2317,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
+               esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+       mutex_init(&esw->offloads.termtbl_mutex);
+-      mlx5_rdma_enable_roce(esw->dev);
++      err = mlx5_rdma_enable_roce(esw->dev);
++      if (err)
++              goto err_roce;
+       err = mlx5_esw_host_number_init(esw);
+       if (err)
+@@ -2366,6 +2368,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
+ err_metadata:
+       esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+       mlx5_rdma_disable_roce(esw->dev);
++err_roce:
+       mutex_destroy(&esw->offloads.termtbl_mutex);
+       return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+index ab5afa6c5e0fd..e61a4fa46d772 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+@@ -152,17 +152,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
+       mlx5_nic_vport_disable_roce(dev);
+ }
+-void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
++int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+ {
+       int err;
+       if (!MLX5_CAP_GEN(dev, roce))
+-              return;
++              return 0;
+       err = mlx5_nic_vport_enable_roce(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
+-              return;
++              return err;
+       }
+       err = mlx5_rdma_add_roce_addr(dev);
+@@ -177,10 +177,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+               goto del_roce_addr;
+       }
+-      return;
++      return err;
+ del_roce_addr:
+       mlx5_rdma_del_roce_addr(dev);
+ disable_roce:
+       mlx5_nic_vport_disable_roce(dev);
++      return err;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
+index 750cff2a71a4b..3d9e76c3d42fb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
+@@ -8,12 +8,12 @@
+ #ifdef CONFIG_MLX5_ESWITCH
+-void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
++int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
+ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
+ #else /* CONFIG_MLX5_ESWITCH */
+-static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {}
++static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; }
+ static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
+ #endif /* CONFIG_MLX5_ESWITCH */
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-mlx5-e-switch-initialize-mac-address-for-default.patch b/queue-5.10/net-mlx5-e-switch-initialize-mac-address-for-default.patch
new file mode 100644 (file)
index 0000000..002fd86
--- /dev/null
@@ -0,0 +1,48 @@
+From 1a4f093015a9f0faaf74e0a081970f6ec4b5900e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 11:36:08 +0300
+Subject: net/mlx5: E-Switch, Initialize MAC Address for Default GID
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 5d1a04f347e6cbf5ffe74da409a5d71fbe8c5f19 ]
+
+Initialize the source MAC address when creating the default GID entry.
+Since this entry is used only for loopback traffic, it only needs to
+be a unicast address. A zeroed-out MAC address is sufficient for this
+purpose.
+Without this fix, random bits would be assigned as the source address.
+If these bits formed a multicast address, the firmware would return an
+error, preventing the user from switching to switchdev mode:
+
+Error: mlx5_core: Failed setting eswitch to offloads.
+kernel answers: Invalid argument
+
+Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250423083611.324567-3-mbloch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+index 2389239acadc9..945d90844f0cb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+@@ -130,8 +130,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *
+ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
+ {
++      u8 mac[ETH_ALEN] = {};
+       union ib_gid gid;
+-      u8 mac[ETH_ALEN];
+       mlx5_rdma_make_default_gid(dev, &gid);
+       return mlx5_core_roce_gid_set(dev, 0,
+-- 
+2.39.5
+
diff --git a/queue-5.10/net-mlx5-remove-return-statement-exist-at-the-end-of.patch b/queue-5.10/net-mlx5-remove-return-statement-exist-at-the-end-of.patch
new file mode 100644 (file)
index 0000000..d3399f4
--- /dev/null
@@ -0,0 +1,46 @@
+From 60bf5d814d5ba924840d12fd7067751f3835b772 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Apr 2021 21:07:42 +0800
+Subject: net/mlx5: Remove return statement exist at the end of void function
+
+From: Wenpeng Liang <liangwenpeng@huawei.com>
+
+[ Upstream commit 9dee115bc1478b6a51f664defbc5b091985a3fd3 ]
+
+void function return statements are not generally useful.
+
+Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
+Signed-off-by: Weihang Li <liweihang@huawei.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 90538d23278a ("net/mlx5: E-switch, Fix error handling for enabling roce")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 1 -
+ drivers/net/ethernet/mellanox/mlx5/core/rdma.c    | 1 -
+ 2 files changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+index 6fd9749203944..2f886bd776640 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -88,7 +88,6 @@ static void irq_set_name(char *name, int vecidx)
+       snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+                vecidx - MLX5_IRQ_VEC_COMP_BASE);
+-      return;
+ }
+ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+index 945d90844f0cb..ab5afa6c5e0fd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+@@ -183,5 +183,4 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
+       mlx5_rdma_del_roce_addr(dev);
+ disable_roce:
+       mlx5_nic_vport_disable_roce(dev);
+-      return;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.10/net_sched-drr-fix-double-list-add-in-class-with-nete.patch b/queue-5.10/net_sched-drr-fix-double-list-add-in-class-with-nete.patch
new file mode 100644 (file)
index 0000000..639356b
--- /dev/null
@@ -0,0 +1,75 @@
+From cbf3e86dabfab4d4ec1b0989cb5a039480a3243e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 19:07:05 -0300
+Subject: net_sched: drr: Fix double list add in class with netem as child
+ qdisc
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit f99a3fbf023e20b626be4b0f042463d598050c9a ]
+
+As described in Gerrard's report [1], there are use cases where a netem
+child qdisc will make the parent qdisc's enqueue callback reentrant.
+In the case of drr, there won't be a UAF, but the code will add the same
+classifier to the list twice, which will cause memory corruption.
+
+In addition to checking for qlen being zero, this patch checks whether the
+class was already added to the active_list (cl_is_active) before adding
+to the list to cover for the reentrant case.
+
+[1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/
+
+Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs")
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Link: https://patch.msgid.link/20250425220710.3964791-2-victor@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_drr.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
+index 08424aac6da82..7ddf73f5a4181 100644
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -36,6 +36,11 @@ struct drr_sched {
+       struct Qdisc_class_hash         clhash;
+ };
++static bool cl_is_active(struct drr_class *cl)
++{
++      return !list_empty(&cl->alist);
++}
++
+ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
+ {
+       struct drr_sched *q = qdisc_priv(sch);
+@@ -344,7 +349,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+       struct drr_sched *q = qdisc_priv(sch);
+       struct drr_class *cl;
+       int err = 0;
+-      bool first;
+       cl = drr_classify(skb, sch, &err);
+       if (cl == NULL) {
+@@ -354,7 +358,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+               return err;
+       }
+-      first = !cl->qdisc->q.qlen;
+       err = qdisc_enqueue(skb, cl->qdisc, to_free);
+       if (unlikely(err != NET_XMIT_SUCCESS)) {
+               if (net_xmit_drop_count(err)) {
+@@ -364,7 +367,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+               return err;
+       }
+-      if (first) {
++      if (!cl_is_active(cl)) {
+               list_add_tail(&cl->alist, &q->active);
+               cl->deficit = cl->quantum;
+       }
+-- 
+2.39.5
+
diff --git a/queue-5.10/net_sched-ets-fix-double-list-add-in-class-with-nete.patch b/queue-5.10/net_sched-ets-fix-double-list-add-in-class-with-nete.patch
new file mode 100644 (file)
index 0000000..34604b0
--- /dev/null
@@ -0,0 +1,75 @@
+From 86406824e413841050de572f05adddad4522aa47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 19:07:07 -0300
+Subject: net_sched: ets: Fix double list add in class with netem as child
+ qdisc
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 1a6d0c00fa07972384b0c308c72db091d49988b6 ]
+
+As described in Gerrard's report [1], there are use cases where a netem
+child qdisc will make the parent qdisc's enqueue callback reentrant.
+In the case of ets, there won't be a UAF, but the code will add the same
+classifier to the list twice, which will cause memory corruption.
+
+In addition to checking for qlen being zero, this patch checks whether
+the class was already added to the active_list (cl_is_active) before
+doing the addition to cater for the reentrant case.
+
+[1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/
+
+Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs")
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Link: https://patch.msgid.link/20250425220710.3964791-4-victor@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_ets.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
+index 0afd9187f836a..35b8577aef7dc 100644
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
+       [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
+ };
++static bool cl_is_active(struct ets_class *cl)
++{
++      return !list_empty(&cl->alist);
++}
++
+ static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
+                            unsigned int *quantum,
+                            struct netlink_ext_ack *extack)
+@@ -424,7 +429,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+       struct ets_sched *q = qdisc_priv(sch);
+       struct ets_class *cl;
+       int err = 0;
+-      bool first;
+       cl = ets_classify(skb, sch, &err);
+       if (!cl) {
+@@ -434,7 +438,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+               return err;
+       }
+-      first = !cl->qdisc->q.qlen;
+       err = qdisc_enqueue(skb, cl->qdisc, to_free);
+       if (unlikely(err != NET_XMIT_SUCCESS)) {
+               if (net_xmit_drop_count(err)) {
+@@ -444,7 +447,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+               return err;
+       }
+-      if (first && !ets_class_is_strict(q, cl)) {
++      if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) {
+               list_add_tail(&cl->alist, &q->active);
+               cl->deficit = cl->quantum;
+       }
+-- 
+2.39.5
+
diff --git a/queue-5.10/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-with.patch b/queue-5.10/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-with.patch
new file mode 100644 (file)
index 0000000..1f881a4
--- /dev/null
@@ -0,0 +1,49 @@
+From e127f37b009000841ee411335942e4ff8994b1a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 19:07:06 -0300
+Subject: net_sched: hfsc: Fix a UAF vulnerability in class with netem as child
+ qdisc
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 141d34391abbb315d68556b7c67ad97885407547 ]
+
+As described in Gerrard's report [1], we have a UAF case when an hfsc class
+has a netem child qdisc. The crux of the issue is that hfsc is assuming
+that checking for cl->qdisc->q.qlen == 0 guarantees that it hasn't inserted
+the class in the vttree or eltree (which is not true for the netem
+duplicate case).
+
+This patch checks the n_active class variable to make sure that the code
+won't insert the class in the vttree or eltree twice, catering for the
+reentrant case.
+
+[1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/
+
+Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs")
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Link: https://patch.msgid.link/20250425220710.3964791-3-victor@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_hfsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index aad090fd165b0..adc16643779fb 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1571,7 +1571,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+               return err;
+       }
+-      if (first) {
++      if (first && !cl->cl_nactive) {
+               if (cl->cl_flags & HFSC_RSC)
+                       init_ed(cl, len);
+               if (cl->cl_flags & HFSC_FSC)
+-- 
+2.39.5
+
diff --git a/queue-5.10/net_sched-qfq-fix-double-list-add-in-class-with-nete.patch b/queue-5.10/net_sched-qfq-fix-double-list-add-in-class-with-nete.patch
new file mode 100644 (file)
index 0000000..8d7eac4
--- /dev/null
@@ -0,0 +1,77 @@
+From 4a2ee7a243c91b28796f9d9593251260ca6c176a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 19:07:08 -0300
+Subject: net_sched: qfq: Fix double list add in class with netem as child
+ qdisc
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit f139f37dcdf34b67f5bf92bc8e0f7f6b3ac63aa4 ]
+
+As described in Gerrard's report [1], there are use cases where a netem
+child qdisc will make the parent qdisc's enqueue callback reentrant.
+In the case of qfq, there won't be a UAF, but the code will add the same
+classifier to the list twice, which will cause memory corruption.
+
+This patch checks whether the class was already added to the agg->active
+list (cl_is_active) before doing the addition to cater for the reentrant
+case.
+
+[1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/
+
+Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs")
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Link: https://patch.msgid.link/20250425220710.3964791-5-victor@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_qfq.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index ebf9f473c9392..1ee15db5fcc8c 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -204,6 +204,11 @@ struct qfq_sched {
+  */
+ enum update_reason {enqueue, requeue};
++static bool cl_is_active(struct qfq_class *cl)
++{
++      return !list_empty(&cl->alist);
++}
++
+ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
+ {
+       struct qfq_sched *q = qdisc_priv(sch);
+@@ -1222,7 +1227,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+       struct qfq_class *cl;
+       struct qfq_aggregate *agg;
+       int err = 0;
+-      bool first;
+       cl = qfq_classify(skb, sch, &err);
+       if (cl == NULL) {
+@@ -1244,7 +1248,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+       }
+       gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+-      first = !cl->qdisc->q.qlen;
+       err = qdisc_enqueue(skb, cl->qdisc, to_free);
+       if (unlikely(err != NET_XMIT_SUCCESS)) {
+               pr_debug("qfq_enqueue: enqueue failed %d\n", err);
+@@ -1261,8 +1264,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+       ++sch->q.qlen;
+       agg = cl->agg;
+-      /* if the queue was not empty, then done here */
+-      if (!first) {
++      /* if the class is active, then done here */
++      if (cl_is_active(cl)) {
+               if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
+                   list_first_entry(&agg->active, struct qfq_class, alist)
+                   == cl && cl->deficit < len)
+-- 
+2.39.5
+
diff --git a/queue-5.10/nvme-tcp-fix-premature-queue-removal-and-i-o-failove.patch b/queue-5.10/nvme-tcp-fix-premature-queue-removal-and-i-o-failove.patch
new file mode 100644 (file)
index 0000000..85fdb7b
--- /dev/null
@@ -0,0 +1,112 @@
+From 7443bb35fde478c904df8a38bcf60cc39ed6bb9a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Apr 2025 10:42:01 -0600
+Subject: nvme-tcp: fix premature queue removal and I/O failover
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michael Liang <mliang@purestorage.com>
+
+[ Upstream commit 77e40bbce93059658aee02786a32c5c98a240a8a ]
+
+This patch addresses a data corruption issue observed in nvme-tcp during
+testing.
+
+In an NVMe native multipath setup, when an I/O timeout occurs, all
+inflight I/Os are canceled almost immediately after the kernel socket is
+shut down. These canceled I/Os are reported as host path errors,
+triggering a failover that succeeds on a different path.
+
+However, at this point, the original I/O may still be outstanding in the
+host's network transmission path (e.g., the NIC’s TX queue). From the
+user-space app's perspective, the buffer associated with the I/O is
+considered completed since they're acked on the different path and may
+be reused for new I/O requests.
+
+Because nvme-tcp enables zero-copy by default in the transmission path,
+this can lead to corrupted data being sent to the original target,
+ultimately causing data corruption.
+
+We can reproduce this data corruption by injecting delay on one path and
+triggering i/o timeout.
+
+To prevent this issue, this change ensures that all inflight
+transmissions are fully completed from host's perspective before
+returning from queue stop. To handle concurrent I/O timeout from multiple
+namespaces under the same controller, always wait in queue stop
+regardless of queue's state.
+
+This aligns with the behavior of queue stopping in other NVMe fabric
+transports.
+
+Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver")
+Signed-off-by: Michael Liang <mliang@purestorage.com>
+Reviewed-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Reviewed-by: Randy Jennings <randyj@purestorage.com>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++--
+ 1 file changed, 29 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
+index 93835c019b8e3..7709a604d0bef 100644
+--- a/drivers/nvme/host/tcp.c
++++ b/drivers/nvme/host/tcp.c
+@@ -1573,7 +1573,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
+       cancel_work_sync(&queue->io_work);
+ }
+-static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
++static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid)
+ {
+       struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+       struct nvme_tcp_queue *queue = &ctrl->queues[qid];
+@@ -1584,6 +1584,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+       mutex_unlock(&queue->queue_lock);
+ }
++static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid)
++{
++      struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
++      struct nvme_tcp_queue *queue = &ctrl->queues[qid];
++      int timeout = 100;
++
++      while (timeout > 0) {
++              if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) ||
++                  !sk_wmem_alloc_get(queue->sock->sk))
++                      return;
++              msleep(2);
++              timeout -= 2;
++      }
++      dev_warn(nctrl->device,
++               "qid %d: timeout draining sock wmem allocation expired\n",
++               qid);
++}
++
++static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
++{
++      nvme_tcp_stop_queue_nowait(nctrl, qid);
++      nvme_tcp_wait_queue(nctrl, qid);
++}
++
++
+ static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
+ {
+       write_lock_bh(&queue->sock->sk->sk_callback_lock);
+@@ -1691,7 +1716,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
+       int i;
+       for (i = 1; i < ctrl->queue_count; i++)
+-              nvme_tcp_stop_queue(ctrl, i);
++              nvme_tcp_stop_queue_nowait(ctrl, i);
++      for (i = 1; i < ctrl->queue_count; i++)
++              nvme_tcp_wait_queue(ctrl, i);
+ }
+ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
+-- 
+2.39.5
+
index 61a32824be6b01039c235ccd492bb0cf4a7b3908..5ea5be731c418a59452e7775b07867519350bec3 100644 (file)
@@ -13,3 +13,16 @@ iommu-amd-fix-potential-buffer-overflow-in-parse_ivrs_acpihid.patch
 iommu-vt-d-apply-quirk_iommu_igfx-for-8086-0044-qm57-qs57.patch
 tracing-fix-oob-write-in-trace_seq_to_buffer.patch
 net-sched-act_mirred-don-t-override-retval-if-we-already-lost-the-skb.patch
+net-mlx5-e-switch-initialize-mac-address-for-default.patch
+net-mlx5-remove-return-statement-exist-at-the-end-of.patch
+net-mlx5-e-switch-fix-error-handling-for-enabling-ro.patch
+net_sched-drr-fix-double-list-add-in-class-with-nete.patch
+net_sched-hfsc-fix-a-uaf-vulnerability-in-class-with.patch
+net_sched-ets-fix-double-list-add-in-class-with-nete.patch
+net_sched-qfq-fix-double-list-add-in-class-with-nete.patch
+net-dlink-correct-endianness-handling-of-led_mode.patch
+net-ipv6-fix-udpv6-gso-segmentation-with-nat.patch
+bnxt_en-fix-ethtool-d-byte-order-for-32-bit-values.patch
+nvme-tcp-fix-premature-queue-removal-and-i-o-failove.patch
+net-lan743x-fix-memleak-issue-when-gso-enabled.patch
+net-fec-err007885-workaround-for-conventional-tx.patch