From: Sasha Levin Date: Sat, 16 Dec 2023 03:36:31 +0000 (-0500) Subject: Fixes for 6.6 X-Git-Tag: v5.15.144~59 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=aa077122a8edf8679372ea8ff1b4d67e8af0e9a6;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch b/queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch new file mode 100644 index 00000000000..c780327136b --- /dev/null +++ b/queue-6.6/appletalk-fix-use-after-free-in-atalk_ioctl.patch @@ -0,0 +1,55 @@ +From 51593a249e733948eedc81bfb6b1ea520bc07e77 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 23:10:56 -0500 +Subject: appletalk: Fix Use-After-Free in atalk_ioctl + +From: Hyunwoo Kim + +[ Upstream commit 189ff16722ee36ced4d2a2469d4ab65a8fee4198 ] + +Because atalk_ioctl() accesses sk->sk_receive_queue +without holding a sk->sk_receive_queue.lock, it can +cause a race with atalk_recvmsg(). +A use-after-free for skb occurs with the following flow. +``` +atalk_ioctl() -> skb_peek() +atalk_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() +``` +Add sk->sk_receive_queue.lock to atalk_ioctl() to fix this issue. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Hyunwoo Kim +Link: https://lore.kernel.org/r/20231213041056.GA519680@v4bel-B760M-AORUS-ELITE-AX +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/appletalk/ddp.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c +index 8978fb6212ffb..b070a89912000 100644 +--- a/net/appletalk/ddp.c ++++ b/net/appletalk/ddp.c +@@ -1811,15 +1811,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) + break; + } + case TIOCINQ: { +- /* +- * These two are safe on a single CPU system as only +- * user tasks fiddle here +- */ +- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); ++ struct sk_buff *skb; + long amount = 0; + ++ spin_lock_irq(&sk->sk_receive_queue.lock); ++ skb = skb_peek(&sk->sk_receive_queue); + if (skb) + amount = skb->len - sizeof(struct ddpehdr); ++ spin_unlock_irq(&sk->sk_receive_queue.lock); + rc = put_user(amount, (int __user *)argp); + break; + } +-- +2.43.0 + diff --git a/queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch b/queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch new file mode 100644 index 00000000000..6890052f343 --- /dev/null +++ b/queue-6.6/atm-fix-use-after-free-in-do_vcc_ioctl.patch @@ -0,0 +1,55 @@ +From bc174b4d9ab9de29fd1e8328cc52e33a61be4244 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Dec 2023 04:42:10 -0500 +Subject: atm: Fix Use-After-Free in do_vcc_ioctl + +From: Hyunwoo Kim + +[ Upstream commit 24e90b9e34f9e039f56b5f25f6e6eb92cdd8f4b3 ] + +Because do_vcc_ioctl() accesses sk->sk_receive_queue +without holding a sk->sk_receive_queue.lock, it can +cause a race with vcc_recvmsg(). +A use-after-free for skb occurs with the following flow. +``` +do_vcc_ioctl() -> skb_peek() +vcc_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() +``` +Add sk->sk_receive_queue.lock to do_vcc_ioctl() to fix this issue. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Hyunwoo Kim +Link: https://lore.kernel.org/r/20231209094210.GA403126@v4bel-B760M-AORUS-ELITE-AX +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/atm/ioctl.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c +index 838ebf0cabbfb..f81f8d56f5c0c 100644 +--- a/net/atm/ioctl.c ++++ b/net/atm/ioctl.c +@@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, + case SIOCINQ: + { + struct sk_buff *skb; ++ int amount; + + if (sock->state != SS_CONNECTED) { + error = -EINVAL; + goto done; + } ++ spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); +- error = put_user(skb ? skb->len : 0, +- (int __user *)argp) ? -EFAULT : 0; ++ amount = skb ? skb->len : 0; ++ spin_unlock_irq(&sk->sk_receive_queue.lock); ++ error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; + goto done; + } + case ATM_SETSC: +-- +2.43.0 + diff --git a/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch new file mode 100644 index 00000000000..330be97366f --- /dev/null +++ b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch @@ -0,0 +1,55 @@ +From 4a6feb340d4952edc01932d371baeb65dfdfdae9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 12:34:37 +0000 +Subject: atm: solos-pci: Fix potential deadlock on &cli_queue_lock + +From: Chengfeng Ye + +[ Upstream commit d5dba32b8f6cb39be708b726044ba30dbc088b30 ] + +As &card->cli_queue_lock is acquired under softirq context along the +following call chain from solos_bh(), other acquisition of the same +lock inside process context should disable at least bh to avoid double +lock. + + +console_show() +--> spin_lock(&card->cli_queue_lock) + + --> solos_bh() + --> spin_lock(&card->cli_queue_lock) + +This flaw was found by an experimental static analysis tool I am +developing for irq-related deadlock. + +To prevent the potential deadlock, the patch uses spin_lock_bh() +on the card->cli_queue_lock under process context code consistently +to prevent the possible deadlock scenario. + +Fixes: 9c54004ea717 ("atm: Driver for Solos PCI ADSL2+ card.") +Signed-off-by: Chengfeng Ye +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/atm/solos-pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c +index 94fbc3abe60e6..95f768b28a5e6 100644 +--- a/drivers/atm/solos-pci.c ++++ b/drivers/atm/solos-pci.c +@@ -449,9 +449,9 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, + struct sk_buff *skb; + unsigned int len; + +- spin_lock(&card->cli_queue_lock); ++ spin_lock_bh(&card->cli_queue_lock); + skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); +- spin_unlock(&card->cli_queue_lock); ++ spin_unlock_bh(&card->cli_queue_lock); + if(skb == NULL) + return sprintf(buf, "No data.\n"); + +-- +2.43.0 + diff --git a/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch new file mode 100644 index 00000000000..09996ab7d5a --- /dev/null +++ b/queue-6.6/atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch @@ -0,0 +1,61 @@ +From 398deac409c3f92bd3728297a28093f164c8aa2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 12:34:53 +0000 +Subject: atm: solos-pci: Fix potential deadlock on &tx_queue_lock + +From: Chengfeng Ye + +[ Upstream commit 15319a4e8ee4b098118591c6ccbd17237f841613 ] + +As &card->tx_queue_lock is acquired under softirq context along the +following call chain from solos_bh(), other acquisition of the same +lock inside process context should disable at least bh to avoid double +lock. + + +pclose() +--> spin_lock(&card->tx_queue_lock) + + --> solos_bh() + --> fpga_tx() + --> spin_lock(&card->tx_queue_lock) + +This flaw was found by an experimental static analysis tool I am +developing for irq-related deadlock. + +To prevent the potential deadlock, the patch uses spin_lock_bh() +on &card->tx_queue_lock under process context code consistently to +prevent the possible deadlock scenario. + +Fixes: 213e85d38912 ("solos-pci: clean up pclose() function") +Signed-off-by: Chengfeng Ye +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/atm/solos-pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c +index 95f768b28a5e6..d3c30a28c410e 100644 +--- a/drivers/atm/solos-pci.c ++++ b/drivers/atm/solos-pci.c +@@ -956,14 +956,14 @@ static void pclose(struct atm_vcc *vcc) + struct pkt_hdr *header; + + /* Remove any yet-to-be-transmitted packets from the pending queue */ +- spin_lock(&card->tx_queue_lock); ++ spin_lock_bh(&card->tx_queue_lock); + skb_queue_walk_safe(&card->tx_queue[port], skb, tmpskb) { + if (SKB_CB(skb)->vcc == vcc) { + skb_unlink(skb, &card->tx_queue[port]); + solos_pop(vcc, skb); + } + } +- spin_unlock(&card->tx_queue_lock); ++ spin_unlock_bh(&card->tx_queue_lock); + + skb = alloc_skb(sizeof(*header), GFP_KERNEL); + if (!skb) { +-- +2.43.0 + diff --git a/queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch b/queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch new file mode 100644 index 00000000000..5aab621c443 --- /dev/null +++ b/queue-6.6/bnxt_en-clear-resource-reservation-during-resume.patch @@ -0,0 +1,46 @@ +From 841064165fa0acf401485f07cddea8f33877d0f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 16:16:55 -0800 +Subject: bnxt_en: Clear resource reservation during resume + +From: Somnath Kotur + +[ Upstream commit 9ef7c58f5abe41e6d91f37f28fe2d851ffedd92a ] + +We are issuing HWRM_FUNC_RESET cmd to reset the device including +all reserved resources, but not clearing the reservations +within the driver struct. As a result, when the driver re-initializes +as part of resume, it believes that there is no need to do any +resource reservation and goes ahead and tries to allocate rings +which will eventually fail beyond a certain number pre-reserved by +the firmware. + +Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") +Reviewed-by: Kalesh AP +Reviewed-by: Ajit Khaparde +Reviewed-by: Andy Gospodarek +Signed-off-by: Somnath Kotur +Signed-off-by: Michael Chan +Link: https://lore.kernel.org/r/20231208001658.14230-2-michael.chan@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 7551aa8068f8f..4d2296f201adb 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -13897,6 +13897,8 @@ static int bnxt_resume(struct device *device) + if (rc) + goto resume_exit; + ++ bnxt_clear_reservations(bp, true); ++ + if (bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false)) { + rc = -ENODEV; + goto resume_exit; +-- +2.43.0 + diff --git a/queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch b/queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch new file mode 100644 index 00000000000..a28dc85f587 --- /dev/null +++ b/queue-6.6/bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch @@ -0,0 +1,107 @@ +From 18bc07b865891ec6f3bd19753095f6b3ce6997b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 16:16:58 -0800 +Subject: bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic + +From: Michael Chan + +[ Upstream commit c13e268c0768659cdaae4bfe2fb24860bcc8ddb4 ] + +When the chip is configured to timestamp all receive packets, the +timestamp in the RX completion is only valid if the metadata +present flag is not set for packets received on the wire. In +addition, internal loopback packets will never have a valid timestamp +and the timestamp field will always be zero. We must exclude +any 0 value in the timestamp field because there is no way to +determine if it is a loopback packet or not. + +Add a new function bnxt_rx_ts_valid() to check for all timestamp +valid conditions. + +Fixes: 66ed81dcedc6 ("bnxt_en: Enable packet timestamping for all RX packets") +Reviewed-by: Andy Gospodarek +Reviewed-by: Pavan Chebbi +Signed-off-by: Michael Chan +Link: https://lore.kernel.org/r/20231208001658.14230-5-michael.chan@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 +++++++++++++++++--- + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 8 +++++++- + 2 files changed, 24 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 4ce34a39bb5ee..f811d59fd71fd 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1760,6 +1760,21 @@ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, + napi_gro_receive(&bnapi->napi, skb); + } + ++static bool bnxt_rx_ts_valid(struct bnxt *bp, u32 flags, ++ struct rx_cmp_ext *rxcmp1, u32 *cmpl_ts) ++{ ++ u32 ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); ++ ++ if (BNXT_PTP_RX_TS_VALID(flags)) ++ goto ts_valid; ++ if (!bp->ptp_all_rx_tstamp || !ts || !BNXT_ALL_RX_TS_VALID(flags)) ++ return false; ++ ++ts_valid: ++ *cmpl_ts = ts; ++ return true; ++} ++ + /* returns the following: + * 1 - 1 packet successfully received + * 0 - successful TPA_START, packet not completed yet +@@ -1785,6 +1800,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + struct sk_buff *skb; + struct xdp_buff xdp; + u32 flags, misc; ++ u32 cmpl_ts; + void *data; + int rc = 0; + +@@ -2007,10 +2023,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + } + } + +- if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) == +- RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) { ++ if (bnxt_rx_ts_valid(bp, flags, rxcmp1, &cmpl_ts)) { + if (bp->flags & BNXT_FLAG_CHIP_P5) { +- u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); + u64 ns, ts; + + if (!bnxt_get_rx_ts_p5(bp, &ts, cmpl_ts)) { +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index ea0f47eceea7c..0116f67593e3a 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -161,7 +161,7 @@ struct rx_cmp { + #define RX_CMP_FLAGS_ERROR (1 << 6) + #define RX_CMP_FLAGS_PLACEMENT (7 << 7) + #define RX_CMP_FLAGS_RSS_VALID (1 << 10) +- #define RX_CMP_FLAGS_UNUSED (1 << 11) ++ #define RX_CMP_FLAGS_PKT_METADATA_PRESENT (1 << 11) + #define RX_CMP_FLAGS_ITYPES_SHIFT 12 + #define RX_CMP_FLAGS_ITYPES_MASK 0xf000 + #define RX_CMP_FLAGS_ITYPE_UNKNOWN (0 << 12) +@@ -188,6 +188,12 @@ struct rx_cmp { + __le32 rx_cmp_rss_hash; + }; + ++#define BNXT_PTP_RX_TS_VALID(flags) \ ++ (((flags) & RX_CMP_FLAGS_ITYPES_MASK) == RX_CMP_FLAGS_ITYPE_PTP_W_TS) ++ ++#define BNXT_ALL_RX_TS_VALID(flags) \ ++ !((flags) & RX_CMP_FLAGS_PKT_METADATA_PRESENT) ++ + #define RX_CMP_HASH_VALID(rxcmp) \ + ((rxcmp)->rx_cmp_len_flags_type & cpu_to_le32(RX_CMP_FLAGS_RSS_VALID)) + +-- +2.43.0 + diff --git a/queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch b/queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch new file mode 100644 index 00000000000..d2dc9a66a2e --- /dev/null +++ b/queue-6.6/bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch @@ -0,0 +1,50 @@ +From 61ad5d6d9e8bdc87c084bcf4b46e5ec5085551bc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 16:16:56 -0800 +Subject: bnxt_en: Fix skb recycling logic in bnxt_deliver_skb() + +From: Sreekanth Reddy + +[ Upstream commit aded5d1feb08e48d544845d3594d70c4d5fe6e54 ] + +Receive SKBs can go through the VF-rep path or the normal path. +skb_mark_for_recycle() is only called for the normal path. Fix it +to do it for both paths to fix possible stalled page pool shutdown +errors. + +Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP") +Reviewed-by: Somnath Kotur +Reviewed-by: Andy Gospodarek +Reviewed-by: Vikas Gupta +Signed-off-by: Sreekanth Reddy +Signed-off-by: Michael Chan +Link: https://lore.kernel.org/r/20231208001658.14230-3-michael.chan@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 4d2296f201adb..9f52b943fedec 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1749,13 +1749,14 @@ static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, + struct sk_buff *skb) + { ++ skb_mark_for_recycle(skb); ++ + if (skb->dev != bp->dev) { + /* this packet belongs to a vf-rep */ + bnxt_vf_rep_rx(bp, skb); + return; + } + skb_record_rx_queue(skb, bnapi->index); +- skb_mark_for_recycle(skb); + napi_gro_receive(&bnapi->napi, skb); + } + +-- +2.43.0 + diff --git a/queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch b/queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch new file mode 100644 index 00000000000..876b1b76c7b --- /dev/null +++ b/queue-6.6/bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch @@ -0,0 +1,177 @@ +From 7f21997937850c004bef110e7374919546fb1245 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 16:16:57 -0800 +Subject: bnxt_en: Fix wrong return value check in bnxt_close_nic() + +From: Kalesh AP + +[ Upstream commit bd6781c18cb5b5e5d8c5873fa9a51668e89ec76e ] + +The wait_event_interruptible_timeout() function returns 0 +if the timeout elapsed, -ERESTARTSYS if it was interrupted +by a signal, and the remaining jiffies otherwise if the +condition evaluated to true before the timeout elapsed. + +Driver should have checked for zero return value instead of +a positive value. + +MChan: Print a warning for -ERESTARTSYS. The close operation +will proceed anyway when wait_event_interruptible_timeout() +returns for any reason. Since we do the close no matter what, +we should not return this error code to the caller. Change +bnxt_close_nic() to a void function and remove all error +handling from some of the callers. + +Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") +Reviewed-by: Andy Gospodarek +Reviewed-by: Vikas Gupta +Reviewed-by: Somnath Kotur +Signed-off-by: Kalesh AP +Signed-off-by: Michael Chan +Link: https://lore.kernel.org/r/20231208001658.14230-4-michael.chan@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++------ + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- + .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 11 ++--------- + .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++--------------- + drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 5 ++--- + 5 files changed, 16 insertions(+), 34 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index 9f52b943fedec..4ce34a39bb5ee 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -10704,10 +10704,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, + bnxt_free_mem(bp, irq_re_init); + } + +-int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) ++void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) + { +- int rc = 0; +- + if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + /* If we get here, it means firmware reset is in progress + * while we are trying to close. We can safely proceed with +@@ -10722,15 +10720,18 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) + + #ifdef CONFIG_BNXT_SRIOV + if (bp->sriov_cfg) { ++ int rc; ++ + rc = wait_event_interruptible_timeout(bp->sriov_cfg_wait, + !bp->sriov_cfg, + BNXT_SRIOV_CFG_WAIT_TMO); +- if (rc) +- netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n"); ++ if (!rc) ++ netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete, proceeding to close!\n"); ++ else if (rc < 0) ++ netdev_warn(bp->dev, "SRIOV config operation interrupted, proceeding to close!\n"); + } + #endif + __bnxt_close_nic(bp, irq_re_init, link_re_init); +- return rc; + } + + static int bnxt_close(struct net_device *dev) +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 84cbcfa61bc12..ea0f47eceea7c 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -2362,7 +2362,7 @@ int bnxt_open_nic(struct bnxt *, bool, bool); + int bnxt_half_open_nic(struct bnxt *bp); + void bnxt_half_close_nic(struct bnxt *bp); + void bnxt_reenable_sriov(struct bnxt *bp); +-int bnxt_close_nic(struct bnxt *, bool, bool); ++void bnxt_close_nic(struct bnxt *, bool, bool); + void bnxt_get_ring_err_stats(struct bnxt *bp, + struct bnxt_total_ring_err_stats *stats); + int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +index 8b3e7697390f7..9d39f194b260f 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +@@ -478,15 +478,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, + return -ENODEV; + } + bnxt_ulp_stop(bp); +- if (netif_running(bp->dev)) { +- rc = bnxt_close_nic(bp, true, true); +- if (rc) { +- NL_SET_ERR_MSG_MOD(extack, "Failed to close"); +- dev_close(bp->dev); +- rtnl_unlock(); +- break; +- } +- } ++ if (netif_running(bp->dev)) ++ bnxt_close_nic(bp, true, true); + bnxt_vf_reps_free(bp); + rc = bnxt_hwrm_func_drv_unrgtr(bp); + if (rc) { +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +index 547247d98eba2..3c36dd8051485 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +@@ -164,9 +164,8 @@ static int bnxt_set_coalesce(struct net_device *dev, + reset_coalesce: + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { + if (update_stats) { +- rc = bnxt_close_nic(bp, true, false); +- if (!rc) +- rc = bnxt_open_nic(bp, true, false); ++ bnxt_close_nic(bp, true, false); ++ rc = bnxt_open_nic(bp, true, false); + } else { + rc = bnxt_hwrm_set_coal(bp); + } +@@ -955,12 +954,7 @@ static int bnxt_set_channels(struct net_device *dev, + * before PF unload + */ + } +- rc = bnxt_close_nic(bp, true, false); +- if (rc) { +- netdev_err(bp->dev, "Set channel failure rc :%x\n", +- rc); +- return rc; +- } ++ bnxt_close_nic(bp, true, false); + } + + if (sh) { +@@ -3737,12 +3731,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, + bnxt_run_fw_tests(bp, test_mask, &test_results); + } else { + bnxt_ulp_stop(bp); +- rc = bnxt_close_nic(bp, true, false); +- if (rc) { +- etest->flags |= ETH_TEST_FL_FAILED; +- bnxt_ulp_start(bp, rc); +- return; +- } ++ bnxt_close_nic(bp, true, false); + bnxt_run_fw_tests(bp, test_mask, &test_results); + + buf[BNXT_MACLPBK_TEST_IDX] = 1; +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +index f3886710e7787..6e3da3362bd61 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +@@ -521,9 +521,8 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp) + + if (netif_running(bp->dev)) { + if (ptp->rx_filter == HWTSTAMP_FILTER_ALL) { +- rc = bnxt_close_nic(bp, false, false); +- if (!rc) +- rc = bnxt_open_nic(bp, false, false); ++ bnxt_close_nic(bp, false, false); ++ rc = bnxt_open_nic(bp, false, false); + } else { + bnxt_ptp_cfg_tstamp_filters(bp); + } +-- +2.43.0 + diff --git a/queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch b/queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch new file mode 100644 index 00000000000..2305695a7e0 --- /dev/null +++ b/queue-6.6/dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch @@ -0,0 +1,85 @@ +From c30b8088f48735a81992234bda4bb49d7ef95d2b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 18:43:26 +0200 +Subject: dpaa2-switch: do not ask for MDB, VLAN and FDB replay + +From: Ioana Ciornei + +[ Upstream commit f24a49a375f65e8e75ee1b19d806f46dbaae57fd ] + +Starting with commit 4e51bf44a03a ("net: bridge: move the switchdev +object replay helpers to "push" mode") the switchdev_bridge_port_offload() +helper was extended with the intention to provide switchdev drivers easy +access to object addition and deletion replays. This works by calling +the replay helpers with non-NULL notifier blocks. + +In the same commit, the dpaa2-switch driver was updated so that it +passes valid notifier blocks to the helper. At that moment, no +regression was identified through testing. + +In the meantime, the blamed commit changed the behavior in terms of +which ports get hit by the replay. Before this commit, only the initial +port which identified itself as offloaded through +switchdev_bridge_port_offload() got a replay of all port objects and +FDBs. After this, the newly joining port will trigger a replay of +objects on all bridge ports and on the bridge itself. + +This behavior leads to errors in dpaa2_switch_port_vlans_add() when a +VLAN gets installed on the same interface multiple times. + +The intended mechanism to address this is to pass a non-NULL ctx to the +switchdev_bridge_port_offload() helper and then check it against the +port's private structure. But since the driver does not have any use for +the replayed port objects and FDBs until it gains support for LAG +offload, it's better to fix the issue by reverting the dpaa2-switch +driver to not ask for replay. The pointers will be added back when we +are prepared to ignore replays on unrelated ports. + +Fixes: b28d580e2939 ("net: bridge: switchdev: replay all VLAN groups") +Signed-off-by: Ioana Ciornei +Link: https://lore.kernel.org/r/20231212164326.2753457-3-ioana.ciornei@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +index 97d3151076d53..e01a246124ac6 100644 +--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c ++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +@@ -1998,9 +1998,6 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, + return notifier_from_errno(err); + } + +-static struct notifier_block dpaa2_switch_port_switchdev_nb; +-static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb; +- + static int dpaa2_switch_port_bridge_join(struct net_device *netdev, + struct net_device *upper_dev, + struct netlink_ext_ack *extack) +@@ -2043,9 +2040,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, + goto err_egress_flood; + + err = switchdev_bridge_port_offload(netdev, netdev, NULL, +- &dpaa2_switch_port_switchdev_nb, +- &dpaa2_switch_port_switchdev_blocking_nb, +- false, extack); ++ NULL, NULL, false, extack); + if (err) + goto err_switchdev_offload; + +@@ -2079,9 +2074,7 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo + + static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev) + { +- switchdev_bridge_port_unoffload(netdev, NULL, +- &dpaa2_switch_port_switchdev_nb, +- &dpaa2_switch_port_switchdev_blocking_nb); ++ switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL); + } + + static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) +-- +2.43.0 + diff --git a/queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch b/queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch new file mode 100644 index 00000000000..cf3ebf93b1c --- /dev/null +++ b/queue-6.6/dpaa2-switch-fix-size-of-the-dma_unmap.patch @@ -0,0 +1,50 @@ +From 1bfd84ec25ff0610435ca28aa1b92d2acafc0cb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 18:43:25 +0200 +Subject: dpaa2-switch: fix size of the dma_unmap + +From: Ioana Ciornei + +[ Upstream commit 2aad7d4189a923b24efa8ea6ad09059882b1bfe4 ] + +The size of the DMA unmap was wrongly put as a sizeof of a pointer. +Change the value of the DMA unmap to be the actual macro used for the +allocation and the DMA map. + +Fixes: 1110318d83e8 ("dpaa2-switch: add tc flower hardware offload on ingress traffic") +Signed-off-by: Ioana Ciornei +Link: https://lore.kernel.org/r/20231212164326.2753457-2-ioana.ciornei@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +index 4798fb7fe35d1..b6a534a3e0b12 100644 +--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c ++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +@@ -139,7 +139,8 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, + err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + filter_block->acl_id, acl_entry_cfg); + +- dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), ++ dma_unmap_single(dev, acl_entry_cfg->key_iova, ++ DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); +@@ -181,8 +182,8 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, + err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + block->acl_id, acl_entry_cfg); + +- dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), +- DMA_TO_DEVICE); ++ dma_unmap_single(dev, acl_entry_cfg->key_iova, ++ DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + kfree(cmd_buff); +-- +2.43.0 + diff --git a/queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch b/queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch new file mode 100644 index 00000000000..42febe1ac10 --- /dev/null +++ b/queue-6.6/iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch @@ -0,0 +1,194 @@ +From e6ac4c8fe2b678239cbc8ea4989a66022823516c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Nov 2023 10:35:26 -0500 +Subject: iavf: Fix iavf_shutdown to call iavf_remove instead iavf_close + +From: Slawomir Laba + +[ Upstream commit 7ae42ef308ed0f6250b36f43e4eeb182ebbe6215 ] + +Make the flow for pci shutdown be the same to the pci remove. + +iavf_shutdown was implementing an incomplete version +of iavf_remove. It misses several calls to the kernel like +iavf_free_misc_irq, iavf_reset_interrupt_capability, iounmap +that might break the system on reboot or hibernation. + +Implement the call of iavf_remove directly in iavf_shutdown to +close this gap. + +Fixes below error messages (dmesg) during shutdown stress tests - +[685814.900917] ice 0000:88:00.0: MAC 02:d0:5f:82:43:5d does not exist for + VF 0 +[685814.900928] ice 0000:88:00.0: MAC 33:33:00:00:00:01 does not exist for +VF 0 + +Reproduction: + +1. Create one VF interface: +echo 1 > /sys/class/net//device/sriov_numvfs + +2. Run live dmesg on the host: +dmesg -wH + +3. On SUT, script below steps into vf_namespace_assignment.sh + +<#!/bin/sh> // Remove <>. Git removes # line +if= (edit this per VF name) +loop=0 + +while true; do + +echo test round $loop +let loop++ + +ip netns add ns$loop +ip link set dev $if up +ip link set dev $if netns ns$loop +ip netns exec ns$loop ip link set dev $if up +ip netns exec ns$loop ip link set dev $if netns 1 +ip netns delete ns$loop + +done + +4. Run the script for at least 1000 iterations on SUT: +./vf_namespace_assignment.sh + +Expected result: +No errors in dmesg. + +Fixes: 129cf89e5856 ("iavf: rename functions and structs to new name") +Signed-off-by: Slawomir Laba +Reviewed-by: Michal Swiatkowski +Reviewed-by: Ahmed Zaki +Reviewed-by: Jesse Brandeburg +Co-developed-by: Ranganatha Rao +Signed-off-by: Ranganatha Rao +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 72 ++++++--------------- + 1 file changed, 21 insertions(+), 51 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index af8eb27a3615c..257865647c865 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -277,27 +277,6 @@ void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) + kfree(mem->va); + } + +-/** +- * iavf_lock_timeout - try to lock mutex but give up after timeout +- * @lock: mutex that should be locked +- * @msecs: timeout in msecs +- * +- * Returns 0 on success, negative on failure +- **/ +-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) +-{ +- unsigned int wait, delay = 10; +- +- for (wait = 0; wait < msecs; wait += delay) { +- if (mutex_trylock(lock)) +- return 0; +- +- msleep(delay); +- } +- +- return -1; +-} +- + /** + * iavf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure +@@ -4925,34 +4904,6 @@ int iavf_process_config(struct iavf_adapter *adapter) + return 0; + } + +-/** +- * iavf_shutdown - Shutdown the device in preparation for a reboot +- * @pdev: pci device structure +- **/ +-static void iavf_shutdown(struct pci_dev *pdev) +-{ +- struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); +- struct net_device *netdev = adapter->netdev; +- +- netif_device_detach(netdev); +- +- if (netif_running(netdev)) +- iavf_close(netdev); +- +- if (iavf_lock_timeout(&adapter->crit_lock, 5000)) +- dev_warn(&adapter->pdev->dev, "%s: failed to acquire crit_lock\n", __func__); +- /* Prevent the watchdog from running. */ +- iavf_change_state(adapter, __IAVF_REMOVE); +- adapter->aq_required = 0; +- mutex_unlock(&adapter->crit_lock); +- +-#ifdef CONFIG_PM +- pci_save_state(pdev); +- +-#endif +- pci_disable_device(pdev); +-} +- + /** + * iavf_probe - Device Initialization Routine + * @pdev: PCI device information struct +@@ -5166,17 +5117,22 @@ static int __maybe_unused iavf_resume(struct device *dev_d) + **/ + static void iavf_remove(struct pci_dev *pdev) + { +- struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); + struct iavf_fdir_fltr *fdir, *fdirtmp; + struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_cloud_filter *cf, *cftmp; + struct iavf_adv_rss *rss, *rsstmp; + struct iavf_mac_filter *f, *ftmp; ++ struct iavf_adapter *adapter; + struct net_device *netdev; + struct iavf_hw *hw; + int err; + +- netdev = adapter->netdev; ++ /* Don't proceed with remove if netdev is already freed */ ++ netdev = pci_get_drvdata(pdev); ++ if (!netdev) ++ return; ++ ++ adapter = iavf_pdev_to_adapter(pdev); + hw = &adapter->hw; + + if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) +@@ -5304,11 +5260,25 @@ static void iavf_remove(struct pci_dev *pdev) + + destroy_workqueue(adapter->wq); + ++ pci_set_drvdata(pdev, NULL); ++ + free_netdev(netdev); + + pci_disable_device(pdev); + } + ++/** ++ * iavf_shutdown - Shutdown the device in preparation for a reboot ++ * @pdev: pci device structure ++ **/ ++static void iavf_shutdown(struct pci_dev *pdev) ++{ ++ iavf_remove(pdev); ++ ++ if (system_state == SYSTEM_POWER_OFF) ++ pci_set_power_state(pdev, PCI_D3hot); ++} ++ + static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); + + static struct pci_driver iavf_driver = { +-- +2.43.0 + diff --git a/queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch b/queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch new file mode 100644 index 00000000000..5d964e3feed --- /dev/null +++ b/queue-6.6/iavf-handle-ntuple-on-off-based-on-new-state-machine.patch @@ -0,0 +1,131 @@ +From f44ca0576e3444e64bc36f53018eb3b5c3f03943 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Nov 2023 22:47:16 -0500 +Subject: iavf: Handle ntuple on/off based on new state machines for flow + director + +From: Piotr Gardocki + +[ Upstream commit 09d23b8918f9ab0f8114f6b94f2faf8bde3fb52a ] + +ntuple-filter feature on/off: +Default is on. If turned off, the filters will be removed from both +PF and iavf list. The removal is irrespective of current filter state. + +Steps to reproduce: +------------------- + +1. Ensure ntuple is on. + +ethtool -K enp8s0 ntuple-filters on + +2. Create a filter to receive the traffic into non-default rx-queue like 15 +and ensure traffic is flowing into queue into 15. +Now, turn off ntuple. Traffic should not flow to configured queue 15. +It should flow to default RX queue. + +Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") +Signed-off-by: Piotr Gardocki +Reviewed-by: Larysa Zaremba +Signed-off-by: Ranganatha Rao +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 59 +++++++++++++++++++++ + 1 file changed, 59 insertions(+) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 5158addc0aa96..af8eb27a3615c 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -4409,6 +4409,49 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) + return ret; + } + ++/** ++ * iavf_disable_fdir - disable Flow Director and clear existing filters ++ * @adapter: board private structure ++ **/ ++static void iavf_disable_fdir(struct iavf_adapter *adapter) ++{ ++ struct iavf_fdir_fltr *fdir, *fdirtmp; ++ bool del_filters = false; ++ ++ adapter->flags &= ~IAVF_FLAG_FDIR_ENABLED; ++ ++ /* remove all Flow Director filters */ ++ spin_lock_bh(&adapter->fdir_fltr_lock); ++ list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, ++ list) { ++ if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST || ++ fdir->state == IAVF_FDIR_FLTR_INACTIVE) { ++ /* Delete filters not registered in PF */ ++ list_del(&fdir->list); ++ kfree(fdir); ++ adapter->fdir_active_fltr--; ++ } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || ++ fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST || ++ fdir->state == IAVF_FDIR_FLTR_ACTIVE) { ++ /* Filters registered in PF, schedule their deletion */ ++ fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; ++ del_filters = true; ++ } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { ++ /* Request to delete filter already sent to PF, change ++ * state to DEL_PENDING to delete filter after PF's ++ * response, not set as INACTIVE ++ */ ++ fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; ++ } ++ } ++ spin_unlock_bh(&adapter->fdir_fltr_lock); ++ ++ if (del_filters) { ++ adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; ++ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); ++ } ++} ++ + #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_RX | \ +@@ -4431,6 +4474,13 @@ static int iavf_set_features(struct net_device *netdev, + iavf_set_vlan_offload_features(adapter, netdev->features, + features); + ++ if ((netdev->features & NETIF_F_NTUPLE) ^ (features & NETIF_F_NTUPLE)) { ++ if (features & NETIF_F_NTUPLE) ++ adapter->flags |= IAVF_FLAG_FDIR_ENABLED; ++ else ++ iavf_disable_fdir(adapter); ++ } ++ + return 0; + } + +@@ -4726,6 +4776,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, + { + struct iavf_adapter *adapter = netdev_priv(netdev); + ++ if (!FDIR_FLTR_SUPPORT(adapter)) ++ features &= ~NETIF_F_NTUPLE; ++ + return iavf_fix_netdev_vlan_features(adapter, features); + } + +@@ -4843,6 +4896,12 @@ int iavf_process_config(struct iavf_adapter *adapter) + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + ++ if (FDIR_FLTR_SUPPORT(adapter)) { ++ netdev->hw_features |= NETIF_F_NTUPLE; ++ netdev->features |= NETIF_F_NTUPLE; ++ adapter->flags |= IAVF_FLAG_FDIR_ENABLED; ++ } ++ + netdev->priv_flags |= IFF_UNICAST_FLT; + + /* Do not turn on offloads when they are requested to be turned off. +-- +2.43.0 + diff --git a/queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch b/queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch new file mode 100644 index 00000000000..b1f6ba6f9ef --- /dev/null +++ b/queue-6.6/iavf-introduce-new-state-machines-for-flow-director.patch @@ -0,0 +1,407 @@ +From 0c159fba190bb21cb6fb097192dd70017172d8c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Nov 2023 22:47:15 -0500 +Subject: iavf: Introduce new state machines for flow director + +From: Piotr Gardocki + +[ Upstream commit 3a0b5a2929fdeda63fc921c2dbed237059acf732 ] + +New states introduced: + + IAVF_FDIR_FLTR_DIS_REQUEST + IAVF_FDIR_FLTR_DIS_PENDING + IAVF_FDIR_FLTR_INACTIVE + +Current FDIR state machines (SM) are not adequate to handle a few +scenarios in the link DOWN/UP event, reset event and ntuple-feature. + +For example, when VF link goes DOWN and comes back UP administratively, +the expectation is that previously installed filters should also be +restored. But with current SM, filters are not restored. +So with new SM, during link DOWN filters are marked as INACTIVE in +the iavf list but removed from PF. After link UP, SM will transition +from INACTIVE to ADD_REQUEST to restore the filter. + +Similarly, with VF reset, filters will be removed from the PF, but +marked as INACTIVE in the iavf list. Filters will be restored after +reset completion. + +Steps to reproduce: +------------------- + +1. Create a VF. Here VF is enp8s0. + +2. Assign IP addresses to VF and link partner and ping continuously +from remote. Here remote IP is 1.1.1.1. + +3. Check default RX Queue of traffic. + +ethtool -S enp8s0 | grep -E "rx-[[:digit:]]+\.packets" + +4. Add filter - change default RX Queue (to 15 here) + +ethtool -U ens8s0 flow-type ip4 src-ip 1.1.1.1 action 15 loc 5 + +5. Ensure filter gets added and traffic is received on RX queue 15 now. + +Link event testing: +------------------- +6. Bring VF link down and up. If traffic flows to configured queue 15, +test is success, otherwise it is a failure. + +Reset event testing: +-------------------- +7. Reset the VF. If traffic flows to configured queue 15, test is success, +otherwise it is a failure. + +Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") +Signed-off-by: Piotr Gardocki +Reviewed-by: Larysa Zaremba +Signed-off-by: Ranganatha Rao +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/iavf/iavf.h | 1 + + .../net/ethernet/intel/iavf/iavf_ethtool.c | 27 ++++--- + drivers/net/ethernet/intel/iavf/iavf_fdir.h | 15 +++- + drivers/net/ethernet/intel/iavf/iavf_main.c | 48 ++++++++++--- + .../net/ethernet/intel/iavf/iavf_virtchnl.c | 71 +++++++++++++++++-- + 5 files changed, 139 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h +index d8d7b62ceb24e..431d9d62c8c66 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf.h ++++ b/drivers/net/ethernet/intel/iavf/iavf.h +@@ -303,6 +303,7 @@ struct iavf_adapter { + #define IAVF_FLAG_QUEUES_DISABLED BIT(17) + #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) + #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) ++#define IAVF_FLAG_FDIR_ENABLED BIT(21) + /* duplicates for common code */ + #define IAVF_FLAG_DCB_ENABLED 0 + /* flags for admin queue service task */ +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index 1b412754aa422..892c6a4f03bb8 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -1063,7 +1063,7 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *rule = NULL; + int ret = 0; + +- if (!FDIR_FLTR_SUPPORT(adapter)) ++ if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) + return -EOPNOTSUPP; + + spin_lock_bh(&adapter->fdir_fltr_lock); +@@ -1205,7 +1205,7 @@ iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd, + unsigned int cnt = 0; + int val = 0; + +- if (!FDIR_FLTR_SUPPORT(adapter)) ++ if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) + return -EOPNOTSUPP; + + cmd->data = IAVF_MAX_FDIR_FILTERS; +@@ -1397,7 +1397,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx + int count = 50; + int err; + +- if (!FDIR_FLTR_SUPPORT(adapter)) ++ if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) + return -EOPNOTSUPP; + + if (fsp->flow_type & FLOW_MAC_EXT) +@@ -1438,12 +1438,16 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx + spin_lock_bh(&adapter->fdir_fltr_lock); + iavf_fdir_list_add_fltr(adapter, fltr); + adapter->fdir_active_fltr++; +- fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; +- adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; ++ if (adapter->link_up) { ++ fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; ++ adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; ++ } else { ++ fltr->state = IAVF_FDIR_FLTR_INACTIVE; ++ } + spin_unlock_bh(&adapter->fdir_fltr_lock); + +- mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); +- ++ if (adapter->link_up) ++ mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + ret: + if (err && fltr) + kfree(fltr); +@@ -1465,7 +1469,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx + struct iavf_fdir_fltr *fltr = NULL; + int err = 0; + +- if (!FDIR_FLTR_SUPPORT(adapter)) ++ if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) + return -EOPNOTSUPP; + + spin_lock_bh(&adapter->fdir_fltr_lock); +@@ -1474,6 +1478,11 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx + if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) { + fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; ++ } else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) { ++ list_del(&fltr->list); ++ kfree(fltr); ++ adapter->fdir_active_fltr--; ++ fltr = NULL; + } else { + err = -EBUSY; + } +@@ -1782,7 +1791,7 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + ret = 0; + break; + case ETHTOOL_GRXCLSRLCNT: +- if (!FDIR_FLTR_SUPPORT(adapter)) ++ if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) + break; + spin_lock_bh(&adapter->fdir_fltr_lock); + cmd->rule_cnt = adapter->fdir_active_fltr; +diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h +index 9eb9f73f6adf3..d31bd923ba8cb 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h ++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h +@@ -6,12 +6,25 @@ + + struct iavf_adapter; + +-/* State of Flow Director filter */ ++/* State of Flow Director filter ++ * ++ * *_REQUEST states are used to mark filter to be sent to PF driver to perform ++ * an action (either add or delete filter). *_PENDING states are an indication ++ * that request was sent to PF and the driver is waiting for response. ++ * ++ * Both DELETE and DISABLE states are being used to delete a filter in PF. ++ * The difference is that after a successful response filter in DEL_PENDING ++ * state is being deleted from VF driver as well and filter in DIS_PENDING state ++ * is being changed to INACTIVE state. ++ */ + enum iavf_fdir_fltr_state_t { + IAVF_FDIR_FLTR_ADD_REQUEST, /* User requests to add filter */ + IAVF_FDIR_FLTR_ADD_PENDING, /* Filter pending add by the PF */ + IAVF_FDIR_FLTR_DEL_REQUEST, /* User requests to delete filter */ + IAVF_FDIR_FLTR_DEL_PENDING, /* Filter pending delete by the PF */ ++ IAVF_FDIR_FLTR_DIS_REQUEST, /* Filter scheduled to be disabled */ ++ IAVF_FDIR_FLTR_DIS_PENDING, /* Filter pending disable by the PF */ ++ IAVF_FDIR_FLTR_INACTIVE, /* Filter inactive on link down */ + IAVF_FDIR_FLTR_ACTIVE, /* Filter is active */ + }; + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c +index 68783a7b70962..5158addc0aa96 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -1356,18 +1356,20 @@ static void iavf_clear_cloud_filters(struct iavf_adapter *adapter) + **/ + static void iavf_clear_fdir_filters(struct iavf_adapter *adapter) + { +- struct iavf_fdir_fltr *fdir, *fdirtmp; ++ struct iavf_fdir_fltr *fdir; + + /* remove all Flow Director filters */ + spin_lock_bh(&adapter->fdir_fltr_lock); +- list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, +- list) { ++ list_for_each_entry(fdir, &adapter->fdir_list_head, list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) { +- list_del(&fdir->list); +- kfree(fdir); +- adapter->fdir_active_fltr--; +- } else { +- fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; ++ /* Cancel a request, keep filter as inactive */ ++ fdir->state = IAVF_FDIR_FLTR_INACTIVE; ++ } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || ++ fdir->state == IAVF_FDIR_FLTR_ACTIVE) { ++ /* Disable filters which are active or have a pending ++ * request to PF to be added ++ */ ++ fdir->state = IAVF_FDIR_FLTR_DIS_REQUEST; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); +@@ -4174,6 +4176,33 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, + } + } + ++/** ++ * iavf_restore_fdir_filters ++ * @adapter: board private structure ++ * ++ * Restore existing FDIR filters when VF netdev comes back up. ++ **/ ++static void iavf_restore_fdir_filters(struct iavf_adapter *adapter) ++{ ++ struct iavf_fdir_fltr *f; ++ ++ spin_lock_bh(&adapter->fdir_fltr_lock); ++ list_for_each_entry(f, &adapter->fdir_list_head, list) { ++ if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST) { ++ /* Cancel a request, keep filter as active */ ++ f->state = IAVF_FDIR_FLTR_ACTIVE; ++ } else if (f->state == IAVF_FDIR_FLTR_DIS_PENDING || ++ f->state == IAVF_FDIR_FLTR_INACTIVE) { ++ /* Add filters which are inactive or have a pending ++ * request to PF to be deleted ++ */ ++ f->state = IAVF_FDIR_FLTR_ADD_REQUEST; ++ adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; ++ } ++ } ++ spin_unlock_bh(&adapter->fdir_fltr_lock); ++} ++ + /** + * iavf_open - Called when a network interface is made active + * @netdev: network interface device structure +@@ -4241,8 +4270,9 @@ static int iavf_open(struct net_device *netdev) + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + +- /* Restore VLAN filters that were removed with IFF_DOWN */ ++ /* Restore filters that were removed with IFF_DOWN */ + iavf_restore_filters(adapter); ++ iavf_restore_fdir_filters(adapter); + + iavf_configure(adapter); + +diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +index 0b97b424e487a..b95a4f903204b 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +@@ -1738,8 +1738,8 @@ void iavf_add_fdir_filter(struct iavf_adapter *adapter) + **/ + void iavf_del_fdir_filter(struct iavf_adapter *adapter) + { ++ struct virtchnl_fdir_del f = {}; + struct iavf_fdir_fltr *fdir; +- struct virtchnl_fdir_del f; + bool process_fltr = false; + int len; + +@@ -1756,11 +1756,16 @@ void iavf_del_fdir_filter(struct iavf_adapter *adapter) + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) { + process_fltr = true; +- memset(&f, 0, len); + f.vsi_id = fdir->vc_add_msg.vsi_id; + f.flow_id = fdir->flow_id; + fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; + break; ++ } else if (fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST) { ++ process_fltr = true; ++ f.vsi_id = fdir->vc_add_msg.vsi_id; ++ f.flow_id = fdir->flow_id; ++ fdir->state = IAVF_FDIR_FLTR_DIS_PENDING; ++ break; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); +@@ -1904,6 +1909,48 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev, + netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } + ++/** ++ * iavf_activate_fdir_filters - Reactivate all FDIR filters after a reset ++ * @adapter: private adapter structure ++ * ++ * Called after a reset to re-add all FDIR filters and delete some of them ++ * if they were pending to be deleted. ++ */ ++static void iavf_activate_fdir_filters(struct iavf_adapter *adapter) ++{ ++ struct iavf_fdir_fltr *f, *ftmp; ++ bool add_filters = false; ++ ++ spin_lock_bh(&adapter->fdir_fltr_lock); ++ list_for_each_entry_safe(f, ftmp, &adapter->fdir_list_head, list) { ++ if (f->state == IAVF_FDIR_FLTR_ADD_REQUEST || ++ f->state == IAVF_FDIR_FLTR_ADD_PENDING || ++ f->state == IAVF_FDIR_FLTR_ACTIVE) { ++ /* All filters and requests have been removed in PF, ++ * restore them ++ */ ++ f->state = IAVF_FDIR_FLTR_ADD_REQUEST; ++ add_filters = true; ++ } else if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST || ++ f->state == IAVF_FDIR_FLTR_DIS_PENDING) { ++ /* Link down state, leave filters as inactive */ ++ f->state = IAVF_FDIR_FLTR_INACTIVE; ++ } else if (f->state == IAVF_FDIR_FLTR_DEL_REQUEST || ++ f->state == IAVF_FDIR_FLTR_DEL_PENDING) { ++ /* Delete filters that were pending to be deleted, the ++ * list on PF is already cleared after a reset ++ */ ++ list_del(&f->list); ++ kfree(f); ++ adapter->fdir_active_fltr--; ++ } ++ } ++ spin_unlock_bh(&adapter->fdir_fltr_lock); ++ ++ if (add_filters) ++ adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; ++} ++ + /** + * iavf_virtchnl_completion + * @adapter: adapter structure +@@ -2081,7 +2128,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(fdir, &adapter->fdir_list_head, + list) { +- if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { ++ if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING || ++ fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n", + iavf_stat_str(&adapter->hw, +@@ -2217,6 +2265,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + ++ iavf_activate_fdir_filters(adapter); ++ + iavf_parse_vf_resource_msg(adapter); + + /* negotiated VIRTCHNL_VF_OFFLOAD_VLAN_V2, so wait for the +@@ -2406,7 +2456,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { +- if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) { ++ if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || ++ del_fltr->status == ++ VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { + dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n", + fdir->loc); + list_del(&fdir->list); +@@ -2418,6 +2470,17 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, + del_fltr->status); + iavf_print_fdir_fltr(adapter, fdir); + } ++ } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { ++ if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || ++ del_fltr->status == ++ VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { ++ fdir->state = IAVF_FDIR_FLTR_INACTIVE; ++ } else { ++ fdir->state = IAVF_FDIR_FLTR_ACTIVE; ++ dev_info(&adapter->pdev->dev, "Failed to disable Flow Director filter with status: %d\n", ++ del_fltr->status); ++ iavf_print_fdir_fltr(adapter, fdir); ++ } + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); +-- +2.43.0 + diff --git a/queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch b/queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch new file mode 100644 index 00000000000..17cb6461173 --- /dev/null +++ b/queue-6.6/net-atlantic-fix-double-free-in-ring-reinit-logic.patch @@ -0,0 +1,56 @@ +From 4f3dae3bb5f56d9b4b5633e1fcfc8a15ab421e92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Dec 2023 10:40:44 +0100 +Subject: net: atlantic: fix double free in ring reinit logic + +From: Igor Russkikh + +[ Upstream commit 7bb26ea74aa86fdf894b7dbd8c5712c5b4187da7 ] + +Driver has a logic leak in ring data allocation/free, +where double free may happen in aq_ring_free if system is under +stress and driver init/deinit is happening. + +The probability is higher to get this during suspend/resume cycle. + +Verification was done simulating same conditions with + + stress -m 2000 --vm-bytes 20M --vm-hang 10 --backoff 1000 + while true; do sudo ifconfig enp1s0 down; sudo ifconfig enp1s0 up; done + +Fixed by explicitly clearing pointers to NULL on deallocation + +Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") +Reported-by: Linus Torvalds +Closes: https://lore.kernel.org/netdev/CAHk-=wiZZi7FcvqVSUirHBjx0bBUZ4dFrMDVLc3+3HCrtq0rBA@mail.gmail.com/ +Signed-off-by: Igor Russkikh +Link: https://lore.kernel.org/r/20231213094044.22988-1-irusskikh@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +index 694daeaf3e615..e1885c1eb100a 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +@@ -938,11 +938,14 @@ void aq_ring_free(struct aq_ring_s *self) + return; + + kfree(self->buff_ring); ++ self->buff_ring = NULL; + +- if (self->dx_ring) ++ if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size, self->dx_ring, + self->dx_ring_pa); ++ self->dx_ring = NULL; ++ } + } + + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) +-- +2.43.0 + diff --git a/queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch b/queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch new file mode 100644 index 00000000000..d442666c675 --- /dev/null +++ b/queue-6.6/net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch @@ -0,0 +1,71 @@ +From bd11d3397b5d9cb4c4b875b63888b4694131af4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 06:27:58 +0000 +Subject: net: ena: Destroy correct number of xdp queues upon failure + +From: David Arinzon + +[ Upstream commit 41db6f99b5489a0d2ef26afe816ef0c6118d1d47 ] + +The ena_setup_and_create_all_xdp_queues() function freed all the +resources upon failure, after creating only xdp_num_queues queues, +instead of freeing just the created ones. + +In this patch, the only resources that are freed, are the ones +allocated right before the failure occurs. + +Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") +Signed-off-by: Shahar Itzko +Signed-off-by: David Arinzon +Link: https://lore.kernel.org/r/20231211062801.27891-2-darinzon@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index f955bde10cf90..098025d292473 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -74,6 +74,8 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); + static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); ++static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, ++ int first_index, int count); + + /* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ + static void ena_increase_stat(u64 *statp, u64 cnt, +@@ -457,23 +459,22 @@ static void ena_init_all_xdp_queues(struct ena_adapter *adapter) + + static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) + { ++ u32 xdp_first_ring = adapter->xdp_first_ring; ++ u32 xdp_num_queues = adapter->xdp_num_queues; + int rc = 0; + +- rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, +- adapter->xdp_num_queues); ++ rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto setup_err; + +- rc = ena_create_io_tx_queues_in_range(adapter, +- adapter->xdp_first_ring, +- adapter->xdp_num_queues); ++ rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto create_err; + + return 0; + + create_err: +- ena_free_all_io_tx_resources(adapter); ++ ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); + setup_err: + return rc; + } +-- +2.43.0 + diff --git a/queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch b/queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch new file mode 100644 index 00000000000..b4595feb370 --- /dev/null +++ b/queue-6.6/net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch @@ -0,0 +1,182 @@ +From 3b700c0d483a0dc614921470db3818deb43f1fca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 06:28:00 +0000 +Subject: net: ena: Fix DMA syncing in XDP path when SWIOTLB is on + +From: David Arinzon + +[ Upstream commit d760117060cf2e90b5c59c5492cab179a4dbce01 ] + +This patch fixes two issues: + +Issue 1 +------- +Description +``````````` +Current code does not call dma_sync_single_for_cpu() to sync data from +the device side memory to the CPU side memory before the XDP code path +uses the CPU side data. +This causes the XDP code path to read the unset garbage data in the CPU +side memory, resulting in incorrect handling of the packet by XDP. + +Solution +```````` +1. Add a call to dma_sync_single_for_cpu() before the XDP code starts to + use the data in the CPU side memory. +2. The XDP code verdict can be XDP_PASS, in which case there is a + fallback to the non-XDP code, which also calls + dma_sync_single_for_cpu(). + To avoid calling dma_sync_single_for_cpu() twice: +2.1. Put the dma_sync_single_for_cpu() in the code in such a place where + it happens before XDP and non-XDP code. +2.2. Remove the calls to dma_sync_single_for_cpu() in the non-XDP code + for the first buffer only (rx_copybreak and non-rx_copybreak + cases), since the new call that was added covers these cases. + The call to dma_sync_single_for_cpu() for the second buffer and on + stays because only the first buffer is handled by the newly added + dma_sync_single_for_cpu(). And there is no need for special + handling of the second buffer and on for the XDP path since + currently the driver supports only single buffer packets. + +Issue 2 +------- +Description +``````````` +In case the XDP code forwarded the packet (ENA_XDP_FORWARDED), +ena_unmap_rx_buff_attrs() is called with attrs set to 0. +This means that before unmapping the buffer, the internal function +dma_unmap_page_attrs() will also call dma_sync_single_for_cpu() on +the whole buffer (not only on the data part of it). +This sync is both wasteful (since a sync was already explicitly +called before) and also causes a bug, which will be explained +using the below diagram. + +The following diagram shows the flow of events causing the bug. +The order of events is (1)-(4) as shown in the diagram. + +CPU side memory area + + (3)convert_to_xdp_frame() initializes the + headroom with xdpf metadata + || + \/ + ___________________________________ + | | + 0 | V 4K + --------------------------------------------------------------------- + | xdpf->data | other xdpf | < data > | tailroom ||...| + | | fields | | GARBAGE || | + --------------------------------------------------------------------- + + /\ /\ + || || + (4)ena_unmap_rx_buff_attrs() calls (2)dma_sync_single_for_cpu() + dma_sync_single_for_cpu() on the copies data from device + whole buffer page, overwriting side to CPU side memory + the xdpf->data with GARBAGE. || + 0 4K + --------------------------------------------------------------------- + | headroom | < data > | tailroom ||...| + | GARBAGE | | GARBAGE || | + --------------------------------------------------------------------- + +Device side memory area /\ + || + (1) device writes RX packet data + +After the call to ena_unmap_rx_buff_attrs() in (4), the xdpf->data +becomes corrupted, and so when it is later accessed in +ena_clean_xdp_irq()->xdp_return_frame(), it causes a page fault, +crashing the kernel. + +Solution +```````` +Explicitly tell ena_unmap_rx_buff_attrs() not to call +dma_sync_single_for_cpu() by passing it the ENA_DMA_ATTR_SKIP_CPU_SYNC +flag. + +Fixes: f7d625adeb7b ("net: ena: Add dynamic recycling mechanism for rx buffers") +Signed-off-by: Arthur Kiyanovski +Signed-off-by: David Arinzon +Link: https://lore.kernel.org/r/20231211062801.27891-4-darinzon@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 23 ++++++++------------ + 1 file changed, 9 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index b638e1d3d151a..14e41eb57731b 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1493,11 +1493,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + if (unlikely(!skb)) + return NULL; + +- /* sync this buffer for CPU use */ +- dma_sync_single_for_cpu(rx_ring->dev, +- dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, +- len, +- DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); + dma_sync_single_for_device(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, +@@ -1516,17 +1511,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + +- pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); +- + /* If XDP isn't loaded try to reuse part of the RX buffer */ + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); + +- dma_sync_single_for_cpu(rx_ring->dev, +- pre_reuse_paddr + pkt_offset, +- len, +- DMA_FROM_DEVICE); +- + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); + +@@ -1723,6 +1711,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + int xdp_flags = 0; + int total_len = 0; + int xdp_verdict; ++ u8 pkt_offset; + int rc = 0; + int i; + +@@ -1749,13 +1738,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + + /* First descriptor might have an offset set by the device */ + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; +- rx_info->buf_offset += ena_rx_ctx.pkt_offset; ++ pkt_offset = ena_rx_ctx.pkt_offset; ++ rx_info->buf_offset += pkt_offset; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", + rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + ++ dma_sync_single_for_cpu(rx_ring->dev, ++ dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, ++ rx_ring->ena_bufs[0].len, ++ DMA_FROM_DEVICE); ++ + if (ena_xdp_present_ring(rx_ring)) + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); + +@@ -1781,7 +1776,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + if (xdp_verdict & ENA_XDP_FORWARDED) { + ena_unmap_rx_buff_attrs(rx_ring, + &rx_ring->rx_buffer_info[req_id], +- 0); ++ DMA_ATTR_SKIP_CPU_SYNC); + rx_ring->rx_buffer_info[req_id].page = NULL; + } + } +-- +2.43.0 + diff --git a/queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch b/queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch new file mode 100644 index 00000000000..28d60dd6b55 --- /dev/null +++ b/queue-6.6/net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch @@ -0,0 +1,77 @@ +From 730de72d013cdd9bdb59d5ebf56bc6d6e2b3809c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 06:27:59 +0000 +Subject: net: ena: Fix xdp drops handling due to multibuf packets + +From: David Arinzon + +[ Upstream commit 505b1a88d311ff6f8c44a34f94e3be21745cce6f ] + +Current xdp code drops packets larger than ENA_XDP_MAX_MTU. +This is an incorrect condition since the problem is not the +size of the packet, rather the number of buffers it contains. + +This commit: + +1. Identifies and drops XDP multi-buffer packets at the + beginning of the function. +2. Increases the xdp drop statistic when this drop occurs. +3. Adds a one-time print that such drops are happening to + give better indication to the user. + +Fixes: 838c93dc5449 ("net: ena: implement XDP drop support") +Signed-off-by: Arthur Kiyanovski +Signed-off-by: David Arinzon +Link: https://lore.kernel.org/r/20231211062801.27891-3-darinzon@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 098025d292473..b638e1d3d151a 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -1672,20 +1672,23 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, + } + } + +-static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) ++static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs) + { + struct ena_rx_buffer *rx_info; + int ret; + ++ /* XDP multi-buffer packets not supported */ ++ if (unlikely(num_descs > 1)) { ++ netdev_err_once(rx_ring->adapter->netdev, ++ "xdp: dropped unsupported multi-buffer packets\n"); ++ ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp); ++ return ENA_XDP_DROP; ++ } ++ + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; + xdp_prepare_buff(xdp, page_address(rx_info->page), + rx_info->buf_offset, + rx_ring->ena_bufs[0].len, false); +- /* If for some reason we received a bigger packet than +- * we expect, then we simply drop it +- */ +- if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) +- return ENA_XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp); + +@@ -1754,7 +1757,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + + if (ena_xdp_present_ring(rx_ring)) +- xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); ++ xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); + + /* allocate skb and fill it */ + if (xdp_verdict == ENA_XDP_PASS) +-- +2.43.0 + diff --git a/queue-6.6/net-ena-fix-xdp-redirection-error.patch b/queue-6.6/net-ena-fix-xdp-redirection-error.patch new file mode 100644 index 00000000000..354ac065cb8 --- /dev/null +++ b/queue-6.6/net-ena-fix-xdp-redirection-error.patch @@ -0,0 +1,43 @@ +From 0ea404ba02c610ccbb7f0ee1b9167a5bad3e7a9f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 06:28:01 +0000 +Subject: net: ena: Fix XDP redirection error + +From: David Arinzon + +[ Upstream commit 4ab138ca0a340e6d6e7a6a9bd5004bd8f83127ca ] + +When sending TX packets, the meta descriptor can be all zeroes +as no meta information is required (as in XDP). + +This patch removes the validity check, as when +`disable_meta_caching` is enabled, such TX packets will be +dropped otherwise. + +Fixes: 0e3a3f6dacf0 ("net: ena: support new LLQ acceleration mode") +Signed-off-by: Shay Agroskin +Signed-off-by: David Arinzon +Link: https://lore.kernel.org/r/20231211062801.27891-5-darinzon@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c +index 3d6f0a466a9ed..f9f886289b970 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -328,9 +328,6 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + * compare it to the stored version, just create the meta + */ + if (io_sq->disable_meta_caching) { +- if (unlikely(!ena_tx_ctx->meta_valid)) +- return -EINVAL; +- + *have_meta = true; + return ena_com_create_meta(io_sq, ena_meta); + } +-- +2.43.0 + diff --git a/queue-6.6/net-fec-correct-queue-selection.patch b/queue-6.6/net-fec-correct-queue-selection.patch new file mode 100644 index 00000000000..9cb226d4a78 --- /dev/null +++ b/queue-6.6/net-fec-correct-queue-selection.patch @@ -0,0 +1,81 @@ +From 582e358abf71b6163037847c9ab5e765b85d8971 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 16:38:01 +0800 +Subject: net: fec: correct queue selection + +From: Radu Bulie + +[ Upstream commit 9fc95fe95c3e2a63ced8eeca4b256518ab204b63 ] + +The old implementation extracted VLAN TCI info from the payload +before the VLAN tag has been pushed in the payload. + +Another problem was that the VLAN TCI was extracted even if the +packet did not have VLAN protocol header. + +This resulted in invalid VLAN TCI and as a consequence a random +queue was computed. + +This patch fixes the above issues and use the VLAN TCI from the +skb if it is present or VLAN TCI from payload if present. If no +VLAN header is present queue 0 is selected. + +Fixes: 52c4a1a85f4b ("net: fec: add ndo_select_queue to fix TX bandwidth fluctuations") +Signed-off-by: Radu Bulie +Signed-off-by: Wei Fang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec_main.c | 27 +++++++++-------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index 77c8e9cfb4456..35c95f07fd6d7 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -3710,31 +3710,26 @@ static int fec_set_features(struct net_device *netdev, + return 0; + } + +-static u16 fec_enet_get_raw_vlan_tci(struct sk_buff *skb) +-{ +- struct vlan_ethhdr *vhdr; +- unsigned short vlan_TCI = 0; +- +- if (skb->protocol == htons(ETH_P_ALL)) { +- vhdr = (struct vlan_ethhdr *)(skb->data); +- vlan_TCI = ntohs(vhdr->h_vlan_TCI); +- } +- +- return vlan_TCI; +-} +- + static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb, + struct net_device *sb_dev) + { + struct fec_enet_private *fep = netdev_priv(ndev); +- u16 vlan_tag; ++ u16 vlan_tag = 0; + + if (!(fep->quirks & FEC_QUIRK_HAS_AVB)) + return netdev_pick_tx(ndev, skb, NULL); + +- vlan_tag = fec_enet_get_raw_vlan_tci(skb); +- if (!vlan_tag) ++ /* VLAN is present in the payload.*/ ++ if (eth_type_vlan(skb->protocol)) { ++ struct vlan_ethhdr *vhdr = skb_vlan_eth_hdr(skb); ++ ++ vlan_tag = ntohs(vhdr->h_vlan_TCI); ++ /* VLAN is present in the skb but not yet pushed in the payload.*/ ++ } else if (skb_vlan_tag_present(skb)) { ++ vlan_tag = skb->vlan_tci; ++ } else { + return vlan_tag; ++ } + + return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; + } +-- +2.43.0 + diff --git a/queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch b/queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch new file mode 100644 index 00000000000..8e8fc127e29 --- /dev/null +++ b/queue-6.6/net-ipv6-support-reporting-otherwise-unknown-prefix-.patch @@ -0,0 +1,114 @@ +From 224c00961ad5e46c6de114ac0a8a267608c0d875 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Dec 2023 09:36:12 -0800 +Subject: net: ipv6: support reporting otherwise unknown prefix flags in + RTM_NEWPREFIX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maciej Å»enczykowski + +[ Upstream commit bd4a816752bab609dd6d65ae021387beb9e2ddbd ] + +Lorenzo points out that we effectively clear all unknown +flags from PIO when copying them to userspace in the netlink +RTM_NEWPREFIX notification. + +We could fix this one at a time as new flags are defined, +or in one fell swoop - I choose the latter. + +We could either define 6 new reserved flags (reserved1..6) and handle +them individually (and rename them as new flags are defined), or we +could simply copy the entire unmodified byte over - I choose the latter. + +This unfortunately requires some anonymous union/struct magic, +so we add a static assert on the struct size for a little extra safety. + +Cc: David Ahern +Cc: Lorenzo Colitti +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Maciej Å»enczykowski +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/addrconf.h | 12 ++++++++++-- + include/net/if_inet6.h | 4 ---- + net/ipv6/addrconf.c | 6 +----- + 3 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/include/net/addrconf.h b/include/net/addrconf.h +index 82da55101b5a3..61ebe723ee4d5 100644 +--- a/include/net/addrconf.h ++++ b/include/net/addrconf.h +@@ -31,17 +31,22 @@ struct prefix_info { + __u8 length; + __u8 prefix_len; + ++ union __packed { ++ __u8 flags; ++ struct __packed { + #if defined(__BIG_ENDIAN_BITFIELD) +- __u8 onlink : 1, ++ __u8 onlink : 1, + autoconf : 1, + reserved : 6; + #elif defined(__LITTLE_ENDIAN_BITFIELD) +- __u8 reserved : 6, ++ __u8 reserved : 6, + autoconf : 1, + onlink : 1; + #else + #error "Please fix " + #endif ++ }; ++ }; + __be32 valid; + __be32 prefered; + __be32 reserved2; +@@ -49,6 +54,9 @@ struct prefix_info { + struct in6_addr prefix; + }; + ++/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ ++static_assert(sizeof(struct prefix_info) == 32); ++ + #include + #include + #include +diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h +index c8490729b4aea..31bf475eca762 100644 +--- a/include/net/if_inet6.h ++++ b/include/net/if_inet6.h +@@ -22,10 +22,6 @@ + #define IF_RS_SENT 0x10 + #define IF_READY 0x80000000 + +-/* prefix flags */ +-#define IF_PREFIX_ONLINK 0x01 +-#define IF_PREFIX_AUTOCONF 0x02 +- + enum { + INET6_IFADDR_STATE_PREDAD, + INET6_IFADDR_STATE_DAD, +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 0b6ee962c84e2..b007d098ffe2e 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -6137,11 +6137,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, + pmsg->prefix_len = pinfo->prefix_len; + pmsg->prefix_type = pinfo->type; + pmsg->prefix_pad3 = 0; +- pmsg->prefix_flags = 0; +- if (pinfo->onlink) +- pmsg->prefix_flags |= IF_PREFIX_ONLINK; +- if (pinfo->autoconf) +- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; ++ pmsg->prefix_flags = pinfo->flags; + + if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) + goto nla_put_failure; +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch b/queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch new file mode 100644 index 00000000000..0de6e387dc1 --- /dev/null +++ b/queue-6.6/net-mlx5-fix-a-null-vs-is_err-check.patch @@ -0,0 +1,37 @@ +From dfd0684487af5bfaa34f573a541c82daec834b17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Nov 2023 09:36:20 +0300 +Subject: net/mlx5: Fix a NULL vs IS_ERR() check + +From: Dan Carpenter + +[ Upstream commit ca4ef28d0ad831d2521fa2b16952f37fd9324ca3 ] + +The mlx5_esw_offloads_devlink_port() function returns error pointers, not +NULL. + +Fixes: 7bef147a6ab6 ("net/mlx5: Don't skip vport check") +Signed-off-by: Dan Carpenter +Reviewed-by: Wojciech Drewek +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +index 825f9c687633f..007cb167cabc9 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -1503,7 +1503,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, + rpriv->rep->vport); +- if (dl_port) { ++ if (!IS_ERR(dl_port)) { + SET_NETDEV_DEVLINK_PORT(netdev, dl_port); + mlx5e_rep_vnic_reporter_create(priv, dl_port); + } +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch b/queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch new file mode 100644 index 00000000000..acf0852cf4b --- /dev/null +++ b/queue-6.6/net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch @@ -0,0 +1,73 @@ +From fabdb2abbb4c7d9a92c80d62340aa1aa66b3425f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Aug 2023 13:11:32 +0300 +Subject: net/mlx5: Nack sync reset request when HotPlug is enabled + +From: Moshe Shemesh + +[ Upstream commit 3d7a3f2612d75de5f371a681038b089ded6667eb ] + +Current sync reset flow is not supported when PCIe bridge connected +directly to mlx5 device has HotPlug interrupt enabled and can be +triggered on link state change event. Return nack on reset request in +such case. + +Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") +Signed-off-by: Moshe Shemesh +Reviewed-by: Shay Drory +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../ethernet/mellanox/mlx5/core/fw_reset.c | 29 +++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +index b568988e92e3e..c4e19d627da21 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +@@ -325,6 +325,29 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) + mlx5_core_err(dev, "Failed to reload FW tracer\n"); + } + ++#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) ++static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) ++{ ++ struct pci_dev *bridge = dev->pdev->bus->self; ++ u16 reg16; ++ int err; ++ ++ if (!bridge) ++ return -EOPNOTSUPP; ++ ++ err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); ++ if (err) ++ return err; ++ ++ if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) { ++ mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n"); ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++#endif ++ + static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) + { + struct pci_bus *bridge_bus = dev->pdev->bus; +@@ -357,6 +380,12 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) + return false; + } + ++#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) ++ err = mlx5_check_hotplug_interrupt(dev); ++ if (err) ++ return false; ++#endif ++ + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return false; +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch b/queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch new file mode 100644 index 00000000000..680b1a004ce --- /dev/null +++ b/queue-6.6/net-mlx5e-check-netdev-pointer-before-checking-its-n.patch @@ -0,0 +1,59 @@ +From 2312b6ae40d5f24501806cad35104b9ed9a8ce17 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 31 Aug 2023 05:47:09 +0300 +Subject: net/mlx5e: Check netdev pointer before checking its net ns + +From: Gavin Li + +[ Upstream commit 7aaf975238c47b710fcc4eca0da1e7902a53abe2 ] + +Previously, when comparing the net namespaces, the case where the netdev +doesn't exist wasn't taken into account, and therefore can cause a crash. +In such a case, the comparing function should return false, as there is no +netdev->net to compare the devlink->net to. + +Furthermore, this will result in an attempt to enter switchdev mode +without a netdev to fail, and which is the desired result as there is no +meaning in switchdev mode without a net device. + +Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") +Signed-off-by: Gavin Li +Reviewed-by: Gavi Teitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +index bf78eeca401be..bb8bcb448ae90 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -3653,14 +3653,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) + + static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) + { ++ struct mlx5_core_dev *dev = devlink_priv(devlink); + struct net *devl_net, *netdev_net; +- struct mlx5_eswitch *esw; +- +- esw = mlx5_devlink_eswitch_nocheck_get(devlink); +- netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); +- devl_net = devlink_net(devlink); ++ bool ret = false; + +- return net_eq(devl_net, netdev_net); ++ mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); ++ if (dev->mlx5e_res.uplink_netdev) { ++ netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); ++ devl_net = devlink_net(devlink); ++ ret = net_eq(devl_net, netdev_net); ++ } ++ mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); ++ return ret; + } + + int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch b/queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch new file mode 100644 index 00000000000..3516564a54e --- /dev/null +++ b/queue-6.6/net-mlx5e-check-the-number-of-elements-before-walk-t.patch @@ -0,0 +1,43 @@ +From 54a4bf1895dd8835e3563dd06381c3e54887ef4f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Oct 2023 02:00:44 +0000 +Subject: net/mlx5e: Check the number of elements before walk TC rhashtable + +From: Jianbo Liu + +[ Upstream commit 4e25b661f484df54b6751b65f9ea2434a3b67539 ] + +After IPSec TX tables are destroyed, the flow rules in TC rhashtable, +which have the destination to IPSec, are restored to the original +one, the uplink. + +However, when the device is in switchdev mode and unload driver with +IPSec rules configured, TC rhashtable cleanup is done before IPSec +cleanup, which means tc_ht->tbl is already freed when walking TC +rhashtable, in order to restore the destination. So add the checking +before walking to avoid unexpected behavior. + +Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") +Signed-off-by: Jianbo Liu +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +index 13b5916b64e22..d5d33c3b3aa2a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +@@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) + + xa_for_each(&esw->offloads.vport_reps, i, rep) { + rpriv = rep->rep_data[REP_ETH].priv; +- if (!rpriv || !rpriv->netdev) ++ if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + continue; + + rhashtable_walk_enter(&rpriv->tc_ht, &iter); +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch b/queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch new file mode 100644 index 00000000000..203b4ac764c --- /dev/null +++ b/queue-6.6/net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch @@ -0,0 +1,120 @@ +From 74a6e56e6d64f1bafa2c717ba3fb16030bb0201c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Oct 2023 15:44:47 +0200 +Subject: net/mlx5e: Disable IPsec offload support if not FW steering + +From: Chris Mi + +[ Upstream commit 762a55a54eec4217e4cec9265ab6e5d4c11b61bd ] + +IPsec FDB offload can only work with FW steering as of now, +disable the cap upon non FW steering. + +And since the IPSec cap is dynamic now based on steering mode. +Cleanup the resources if they exist instead of checking the +IPsec cap again. + +Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'") +Signed-off-by: Chris Mi +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec.c | 26 ++++++++----------- + .../mlx5/core/en_accel/ipsec_offload.c | 8 +++++- + 2 files changed, 18 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +index 0d4b8aef6adda..5834e47e72d82 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +@@ -929,9 +929,11 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) + return; + + mlx5e_accel_ipsec_fs_cleanup(ipsec); +- if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) ++ if (ipsec->netevent_nb.notifier_call) { + unregister_netevent_notifier(&ipsec->netevent_nb); +- if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) ++ ipsec->netevent_nb.notifier_call = NULL; ++ } ++ if (ipsec->aso) + mlx5e_ipsec_aso_cleanup(ipsec); + destroy_workqueue(ipsec->wq); + kfree(ipsec); +@@ -1040,6 +1042,12 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, + } + } + ++ if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET && ++ !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { ++ NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -1135,14 +1143,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, + .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, +-}; +- +-static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { +- .xdo_dev_state_add = mlx5e_xfrm_add_state, +- .xdo_dev_state_delete = mlx5e_xfrm_del_state, +- .xdo_dev_state_free = mlx5e_xfrm_free_state, +- .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, +- .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, + + .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, + .xdo_dev_policy_add = mlx5e_xfrm_add_policy, +@@ -1160,11 +1160,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + +- if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) +- netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; +- else +- netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; +- ++ netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +index 55b11d8cba532..ce29e31721208 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +@@ -5,6 +5,8 @@ + #include "en.h" + #include "ipsec.h" + #include "lib/crypto.h" ++#include "fs_core.h" ++#include "eswitch.h" + + enum { + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, +@@ -37,7 +39,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) + MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) + caps |= MLX5_IPSEC_CAP_CRYPTO; + +- if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) { ++ if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && ++ (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || ++ (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && ++ is_mdev_legacy_mode(mdev)))) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_add_esp_trasport) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, +@@ -558,6 +563,7 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec) + dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), + DMA_BIDIRECTIONAL); + kfree(aso); ++ ipsec->aso = NULL; + } + + static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl, +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch b/queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch new file mode 100644 index 00000000000..9b39e13b163 --- /dev/null +++ b/queue-6.6/net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch @@ -0,0 +1,49 @@ +From be504b9847c84e1e28fddeda96043f5831198060 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Sep 2023 10:07:13 +0300 +Subject: net/mlx5e: Ensure that IPsec sequence packet number starts from 1 + +From: Leon Romanovsky + +[ Upstream commit 3d42c8cc67a8fcbff0181f9ed6d03d353edcee07 ] + +According to RFC4303, section "3.3.3. Sequence Number Generation", +the first packet sent using a given SA will contain a sequence +number of 1. + +However if user didn't set seq/oseq, the HW used zero as first sequence +packet number. Such misconfiguration causes to drop of first packet +if replay window protection was enabled in SA. + +To fix it, set sequence number to be at least 1. + +Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +index 65678e89aea62..0d4b8aef6adda 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +@@ -121,7 +121,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + +- sa_entry->esn_state.esn = esn; ++ if (sa_entry->esn_state.esn_msb) ++ sa_entry->esn_state.esn = esn; ++ else ++ /* According to RFC4303, section "3.3.3. Sequence Number Generation", ++ * the first packet sent using a given SA will contain a sequence ++ * number of 1. ++ */ ++ sa_entry->esn_state.esn = max_t(u32, esn, 1); + sa_entry->esn_state.esn_msb = esn_msb; + + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch b/queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch new file mode 100644 index 00000000000..8eeb97740b2 --- /dev/null +++ b/queue-6.6/net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch @@ -0,0 +1,243 @@ +From d511e13f216a152d7726e44e4a1a7f40baf85ca5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Sep 2022 18:45:11 +0300 +Subject: net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work + +From: Moshe Shemesh + +[ Upstream commit eab0da38912ebdad922ed0388209f7eb0a5163cd ] + +Due to the cited patch, devlink health commands take devlink lock and +this may result in deadlock for mlx5e_tx_reporter as it takes local +state_lock before calling devlink health report and on the other hand +devlink health commands such as diagnose for same reporter take local +state_lock after taking devlink lock (see kernel log below). + +To fix it, remove local state_lock from mlx5e_tx_timeout_work() before +calling devlink_health_report() and take care to cancel the work before +any call to close channels, which may free the SQs that should be +handled by the work. Before cancel_work_sync(), use current_work() to +check we are not calling it from within the work, as +mlx5e_tx_timeout_work() itself may close the channels and reopen as part +of recovery flow. + +While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to +ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock() +and a flag to avoid deadlock by calling cancel_work_sync() before +closing the channels while holding rtnl_lock too. + +Kernel log: +====================================================== +WARNING: possible circular locking dependency detected +6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted +------------------------------------------------------ +kworker/u16:2/65 is trying to acquire lock: +ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0 + +but task is already holding lock: +ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (&priv->state_lock){+.+.}-{3:3}: + __mutex_lock+0x12c/0x14b0 + mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core] + devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50 + genl_family_rcv_msg_doit+0x1e9/0x2f0 + genl_rcv_msg+0x2e9/0x530 + netlink_rcv_skb+0x11d/0x340 + genl_rcv+0x24/0x40 + netlink_unicast+0x438/0x710 + netlink_sendmsg+0x788/0xc40 + sock_sendmsg+0xb0/0xe0 + __sys_sendto+0x1c1/0x290 + __x64_sys_sendto+0xdd/0x1b0 + do_syscall_64+0x3d/0x90 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +-> #0 (&devlink->lock_key#2){+.+.}-{3:3}: + __lock_acquire+0x2c8a/0x6200 + lock_acquire+0x1c1/0x550 + __mutex_lock+0x12c/0x14b0 + devlink_health_report+0x2f1/0x7e0 + mlx5e_health_report+0xc9/0xd7 [mlx5_core] + mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] + mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] + process_one_work+0x7c2/0x1340 + worker_thread+0x59d/0xec0 + kthread+0x28f/0x330 + ret_from_fork+0x1f/0x30 + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&priv->state_lock); + lock(&devlink->lock_key#2); + lock(&priv->state_lock); + lock(&devlink->lock_key#2); + + *** DEADLOCK *** + +4 locks held by kworker/u16:2/65: + #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 + #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 + #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core] + #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] + +stack backtrace: +CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] +Call Trace: + + dump_stack_lvl+0x57/0x7d + check_noncircular+0x278/0x300 + ? print_circular_bug+0x460/0x460 + ? find_held_lock+0x2d/0x110 + ? __stack_depot_save+0x24c/0x520 + ? alloc_chain_hlocks+0x228/0x700 + __lock_acquire+0x2c8a/0x6200 + ? register_lock_class+0x1860/0x1860 + ? kasan_save_stack+0x1e/0x40 + ? kasan_set_free_info+0x20/0x30 + ? ____kasan_slab_free+0x11d/0x1b0 + ? kfree+0x1ba/0x520 + ? devlink_health_do_dump.part.0+0x171/0x3a0 + ? devlink_health_report+0x3d5/0x7e0 + lock_acquire+0x1c1/0x550 + ? devlink_health_report+0x2f1/0x7e0 + ? lockdep_hardirqs_on_prepare+0x400/0x400 + ? find_held_lock+0x2d/0x110 + __mutex_lock+0x12c/0x14b0 + ? devlink_health_report+0x2f1/0x7e0 + ? devlink_health_report+0x2f1/0x7e0 + ? mutex_lock_io_nested+0x1320/0x1320 + ? trace_hardirqs_on+0x2d/0x100 + ? bit_wait_io_timeout+0x170/0x170 + ? devlink_health_do_dump.part.0+0x171/0x3a0 + ? kfree+0x1ba/0x520 + ? devlink_health_do_dump.part.0+0x171/0x3a0 + devlink_health_report+0x2f1/0x7e0 + mlx5e_health_report+0xc9/0xd7 [mlx5_core] + mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] + ? lockdep_hardirqs_on_prepare+0x400/0x400 + ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core] + ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core] + ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core] + ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] + ? mutex_lock_io_nested+0x1320/0x1320 + ? process_one_work+0x70f/0x1340 + ? lockdep_hardirqs_on_prepare+0x400/0x400 + ? lock_downgrade+0x6e0/0x6e0 + mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] + process_one_work+0x7c2/0x1340 + ? lockdep_hardirqs_on_prepare+0x400/0x400 + ? pwq_dec_nr_in_flight+0x230/0x230 + ? rwlock_bug.part.0+0x90/0x90 + worker_thread+0x59d/0xec0 + ? process_one_work+0x1340/0x1340 + kthread+0x28f/0x330 + ? kthread_complete_and_exit+0x20/0x20 + ret_from_fork+0x1f/0x30 + + +Fixes: c90005b5f75c ("devlink: Hold the instance lock in health callbacks") +Signed-off-by: Moshe Shemesh +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + + .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++++++--- + 2 files changed, 25 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h +index 86f2690c5e015..20a6bc1a234f4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -818,6 +818,7 @@ enum { + MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_ACTIVE, ++ MLX5E_STATE_CHANNELS_ACTIVE, + }; + + struct mlx5e_modify_sq_param { +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index acb40770cf0cf..c3961c2bbc57c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -2668,6 +2668,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) + { + int i; + ++ ASSERT_RTNL(); + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; +@@ -2945,17 +2946,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_activate_channels(priv); + ++ set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); ++ + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); + } + ++static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) ++{ ++ WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); ++ if (current_work() != &priv->tx_timeout_work) ++ cancel_work_sync(&priv->tx_timeout_work); ++} ++ + void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) + { + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); + ++ clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); ++ mlx5e_cancel_tx_timeout_work(priv); ++ + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_deactivate_channels(priv); + +@@ -4734,8 +4747,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) + struct net_device *netdev = priv->netdev; + int i; + +- rtnl_lock(); +- mutex_lock(&priv->state_lock); ++ /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues ++ * through this flow. However, channel closing flows have to wait for ++ * this work to finish while holding rtnl lock too. So either get the ++ * lock or find that channels are being closed for other reason and ++ * this work is not relevant anymore. ++ */ ++ while (!rtnl_trylock()) { ++ if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) ++ return; ++ msleep(20); ++ } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; +@@ -4754,7 +4776,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) + } + + unlock: +- mutex_unlock(&priv->state_lock); + rtnl_unlock(); + } + +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch b/queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch new file mode 100644 index 00000000000..104eedde7ef --- /dev/null +++ b/queue-6.6/net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch @@ -0,0 +1,89 @@ +From c93e2a63f590732fff03704c9588e975b17a5255 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Aug 2023 20:58:56 +0300 +Subject: net/mlx5e: Honor user choice of IPsec replay window size + +From: Leon Romanovsky + +[ Upstream commit a5e400a985df8041ed4659ed1462aa9134318130 ] + +Users can configure IPsec replay window size, but mlx5 driver didn't +honor their choice and set always 32bits. Fix assignment logic to +configure right size from the beginning. + +Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") +Reviewed-by: Patrisious Haddad +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec.c | 21 +++++++++++++++++++ + .../mlx5/core/en_accel/ipsec_offload.c | 2 +- + include/linux/mlx5/mlx5_ifc.h | 7 +++++++ + 3 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +index 7d4ceb9b9c16f..65678e89aea62 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +@@ -335,6 +335,27 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + attrs->replay_esn.esn = sa_entry->esn_state.esn; + attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; + attrs->replay_esn.overlap = sa_entry->esn_state.overlap; ++ switch (x->replay_esn->replay_window) { ++ case 32: ++ attrs->replay_esn.replay_window = ++ MLX5_IPSEC_ASO_REPLAY_WIN_32BIT; ++ break; ++ case 64: ++ attrs->replay_esn.replay_window = ++ MLX5_IPSEC_ASO_REPLAY_WIN_64BIT; ++ break; ++ case 128: ++ attrs->replay_esn.replay_window = ++ MLX5_IPSEC_ASO_REPLAY_WIN_128BIT; ++ break; ++ case 256: ++ attrs->replay_esn.replay_window = ++ MLX5_IPSEC_ASO_REPLAY_WIN_256BIT; ++ break; ++ default: ++ WARN_ON(true); ++ return; ++ } + } + + attrs->dir = x->xso.dir; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +index 3245d1c9d5392..55b11d8cba532 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +@@ -94,7 +94,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, + + if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { + MLX5_SET(ipsec_aso, aso_ctx, window_sz, +- attrs->replay_esn.replay_window / 64); ++ attrs->replay_esn.replay_window); + MLX5_SET(ipsec_aso, aso_ctx, mode, + MLX5_IPSEC_ASO_REPLAY_PROTECTION); + } +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index fc3db401f8a28..f08cd13031458 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -11936,6 +11936,13 @@ enum { + MLX5_IPSEC_ASO_INC_SN = 0x2, + }; + ++enum { ++ MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, ++ MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, ++ MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, ++ MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, ++}; ++ + struct mlx5_ifc_ipsec_aso_bits { + u8 valid[0x1]; + u8 reserved_at_201[0x1]; +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch b/queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch new file mode 100644 index 00000000000..1438e89d58d --- /dev/null +++ b/queue-6.6/net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch @@ -0,0 +1,305 @@ +From 1fe0f5c3938ea965d4f1cdf2de0f223a13e63099 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Oct 2023 03:38:29 +0000 +Subject: net/mlx5e: Reduce eswitch mode_lock protection context + +From: Jianbo Liu + +[ Upstream commit baac8351f74c543896b8fd40138b7ad9365587a3 ] + +Currently eswitch mode_lock is so heavy, for example, it's locked +during the whole process of the mode change, which may need to hold +other locks. As the mode_lock is also used by IPSec to block mode and +encap change now, it is easy to cause lock dependency. + +Since some of protections are also done by devlink lock, the eswitch +mode_lock is not needed at those places, and thus the possibility of +lockdep issue is reduced. + +Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") +Signed-off-by: Jianbo Liu +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 9 +++-- + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++------- + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + + .../mellanox/mlx5/core/eswitch_offloads.c | 38 +++++++++++-------- + 4 files changed, 52 insertions(+), 32 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +index 6dc60be2a697c..03f69c485a006 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +@@ -1834,8 +1834,11 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int err = 0; + +- if (esw) +- down_write(&esw->mode_lock); ++ if (esw) { ++ err = mlx5_esw_lock(esw); ++ if (err) ++ return err; ++ } + + if (mdev->num_block_ipsec) { + err = -EBUSY; +@@ -1846,7 +1849,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) + + unlock: + if (esw) +- up_write(&esw->mode_lock); ++ mlx5_esw_unlock(esw); + + return err; + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +index 8d0b915a31214..3047d7015c525 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1463,7 +1463,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) + { + int err; + +- lockdep_assert_held(&esw->mode_lock); ++ devl_assert_locked(priv_to_devlink(esw->dev)); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); +@@ -1531,7 +1531,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) + if (toggle_lag) + mlx5_lag_disable_change(esw->dev); + +- down_write(&esw->mode_lock); + if (!mlx5_esw_is_fdb_created(esw)) { + ret = mlx5_eswitch_enable_locked(esw, num_vfs); + } else { +@@ -1554,8 +1553,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) + } + } + +- up_write(&esw->mode_lock); +- + if (toggle_lag) + mlx5_lag_enable_change(esw->dev); + +@@ -1569,12 +1566,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) + return; + + devl_assert_locked(priv_to_devlink(esw->dev)); +- down_write(&esw->mode_lock); + /* If driver is unloaded, this function is called twice by remove_one() + * and mlx5_unload(). Prevent the second call. + */ + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) +- goto unlock; ++ return; + + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", +@@ -1603,9 +1599,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; +- +-unlock: +- up_write(&esw->mode_lock); + } + + /* Free resources for corresponding eswitch mode. It is called by devlink +@@ -1647,10 +1640,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) + + devl_assert_locked(priv_to_devlink(esw->dev)); + mlx5_lag_disable_change(esw->dev); +- down_write(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw); + esw->mode = MLX5_ESWITCH_LEGACY; +- up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); + } + +@@ -2254,8 +2245,13 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev) + if (!mlx5_esw_allowed(esw)) + return true; + +- if (down_read_trylock(&esw->mode_lock) != 0) ++ if (down_read_trylock(&esw->mode_lock) != 0) { ++ if (esw->eswitch_operation_in_progress) { ++ up_read(&esw->mode_lock); ++ return false; ++ } + return true; ++ } + + return false; + } +@@ -2312,7 +2308,8 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) + if (down_write_trylock(&esw->mode_lock) == 0) + return -EINVAL; + +- if (atomic64_read(&esw->user_count) > 0) { ++ if (esw->eswitch_operation_in_progress || ++ atomic64_read(&esw->user_count) > 0) { + up_write(&esw->mode_lock); + return -EBUSY; + } +@@ -2320,6 +2317,18 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) + return esw->mode; + } + ++int mlx5_esw_lock(struct mlx5_eswitch *esw) ++{ ++ down_write(&esw->mode_lock); ++ ++ if (esw->eswitch_operation_in_progress) { ++ up_write(&esw->mode_lock); ++ return -EBUSY; ++ } ++ ++ return 0; ++} ++ + /** + * mlx5_esw_unlock() - Release write lock on esw mode lock + * @esw: eswitch device. +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +index 37ab66e7b403f..b674b57d05aad 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +@@ -383,6 +383,7 @@ struct mlx5_eswitch { + struct xarray paired; + struct mlx5_devcom_comp_dev *devcom; + u16 enabled_ipsec_vf_count; ++ bool eswitch_operation_in_progress; + }; + + void esw_offloads_disable(struct mlx5_eswitch *esw); +@@ -827,6 +828,7 @@ void mlx5_esw_release(struct mlx5_core_dev *dev); + void mlx5_esw_get(struct mlx5_core_dev *dev); + void mlx5_esw_put(struct mlx5_core_dev *dev); + int mlx5_esw_try_lock(struct mlx5_eswitch *esw); ++int mlx5_esw_lock(struct mlx5_eswitch *esw); + void mlx5_esw_unlock(struct mlx5_eswitch *esw); + + void esw_vport_change_handle_locked(struct mlx5_vport *vport); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +index 88236e75fd901..bf78eeca401be 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -3733,13 +3733,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + goto unlock; + } + ++ esw->eswitch_operation_in_progress = true; ++ up_write(&esw->mode_lock); ++ + mlx5_eswitch_disable_locked(esw); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_devlink_trap_get_num_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change mode while devlink traps are active"); + err = -EOPNOTSUPP; +- goto unlock; ++ goto skip; + } + err = esw_offloads_start(esw, extack); + } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { +@@ -3749,6 +3752,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + err = -EINVAL; + } + ++skip: ++ down_write(&esw->mode_lock); ++ esw->eswitch_operation_in_progress = false; + unlock: + mlx5_esw_unlock(esw); + enable_lag: +@@ -3759,16 +3765,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) + { + struct mlx5_eswitch *esw; +- int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + +- down_read(&esw->mode_lock); +- err = esw_mode_to_devlink(esw->mode, mode); +- up_read(&esw->mode_lock); +- return err; ++ return esw_mode_to_devlink(esw->mode, mode); + } + + static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, +@@ -3862,11 +3864,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + if (err) + goto out; + ++ esw->eswitch_operation_in_progress = true; ++ up_write(&esw->mode_lock); ++ + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); +- if (err) +- goto out; ++ if (!err) ++ esw->offloads.inline_mode = mlx5_mode; + +- esw->offloads.inline_mode = mlx5_mode; ++ down_write(&esw->mode_lock); ++ esw->eswitch_operation_in_progress = false; + up_write(&esw->mode_lock); + return 0; + +@@ -3878,16 +3884,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) + { + struct mlx5_eswitch *esw; +- int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + +- down_read(&esw->mode_lock); +- err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +- up_read(&esw->mode_lock); +- return err; ++ return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); + } + + bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +@@ -3969,6 +3971,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + goto unlock; + } + ++ esw->eswitch_operation_in_progress = true; ++ up_write(&esw->mode_lock); ++ + esw_destroy_offloads_fdb_tables(esw); + + esw->offloads.encap = encap; +@@ -3982,6 +3987,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + (void)esw_create_offloads_fdb_tables(esw); + } + ++ down_write(&esw->mode_lock); ++ esw->eswitch_operation_in_progress = false; ++ + unlock: + up_write(&esw->mode_lock); + return err; +@@ -3996,9 +4004,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + if (IS_ERR(esw)) + return PTR_ERR(esw); + +- down_read(&esw->mode_lock); + *encap = esw->offloads.encap; +- up_read(&esw->mode_lock); + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch b/queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch new file mode 100644 index 00000000000..5252e0d5280 --- /dev/null +++ b/queue-6.6/net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch @@ -0,0 +1,167 @@ +From f1f2a993678b755dadf802c9beb90aa5f7b88ebb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 13:28:10 +0300 +Subject: net/mlx5e: TC, Don't offload post action rule if not supported + +From: Chris Mi + +[ Upstream commit ccbe33003b109f14c4dde2a4fca9c2a50c423601 ] + +If post action is not supported, eg. ignore_flow_level is not +supported, don't offload post action rule. Otherwise, will hit +panic [1]. + +Fix it by checking if post action table is valid or not. + +[1] +[445537.863880] BUG: unable to handle page fault for address: ffffffffffffffb1 +[445537.864617] #PF: supervisor read access in kernel mode +[445537.865244] #PF: error_code(0x0000) - not-present page +[445537.865860] PGD 70683a067 P4D 70683a067 PUD 70683c067 PMD 0 +[445537.866497] Oops: 0000 [#1] PREEMPT SMP NOPTI +[445537.867077] CPU: 19 PID: 248742 Comm: tc Kdump: loaded Tainted: G O 6.5.0+ #1 +[445537.867888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[445537.868834] RIP: 0010:mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] +[445537.869635] Code: c0 0d 00 00 e8 20 96 c6 d3 48 85 c0 0f 84 e5 00 00 00 c7 83 b0 01 00 00 00 00 00 00 49 89 c5 31 c0 31 d2 66 89 83 b4 01 00 00 <49> 8b 44 24 10 83 23 df 83 8b d8 01 00 00 04 48 89 83 c0 01 00 00 +[445537.871318] RSP: 0018:ffffb98741cef428 EFLAGS: 00010246 +[445537.871962] RAX: 0000000000000000 RBX: ffff8df341167000 RCX: 0000000000000001 +[445537.872704] RDX: 0000000000000000 RSI: ffffffff954844e1 RDI: ffffffff9546e9cb +[445537.873430] RBP: ffffb98741cef448 R08: 0000000000000020 R09: 0000000000000246 +[445537.874160] R10: 0000000000000000 R11: ffffffff943f73ff R12: ffffffffffffffa1 +[445537.874893] R13: ffff8df36d336c20 R14: ffffffffffffffa1 R15: ffff8df341167000 +[445537.875628] FS: 00007fcd6564f800(0000) GS:ffff8dfa9ea00000(0000) knlGS:0000000000000000 +[445537.876425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[445537.877090] CR2: ffffffffffffffb1 CR3: 00000003b5884001 CR4: 0000000000770ee0 +[445537.877832] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[445537.878564] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[445537.879300] PKRU: 55555554 +[445537.879797] Call Trace: +[445537.880263] +[445537.880713] ? show_regs+0x6e/0x80 +[445537.881232] ? __die+0x29/0x70 +[445537.881731] ? page_fault_oops+0x85/0x160 +[445537.882276] ? search_exception_tables+0x65/0x70 +[445537.882852] ? kernelmode_fixup_or_oops+0xa2/0x120 +[445537.883432] ? __bad_area_nosemaphore+0x18b/0x250 +[445537.884019] ? bad_area_nosemaphore+0x16/0x20 +[445537.884566] ? do_kern_addr_fault+0x8b/0xa0 +[445537.885105] ? exc_page_fault+0xf5/0x1c0 +[445537.885623] ? asm_exc_page_fault+0x2b/0x30 +[445537.886149] ? __kmem_cache_alloc_node+0x1df/0x2a0 +[445537.886717] ? mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] +[445537.887431] ? mlx5e_tc_post_act_add+0x30/0x130 [mlx5_core] +[445537.888172] alloc_flow_post_acts+0xfb/0x1c0 [mlx5_core] +[445537.888849] parse_tc_actions+0x582/0x5c0 [mlx5_core] +[445537.889505] parse_tc_fdb_actions+0xd7/0x1f0 [mlx5_core] +[445537.890175] __mlx5e_add_fdb_flow+0x1ab/0x2b0 [mlx5_core] +[445537.890843] mlx5e_add_fdb_flow+0x56/0x120 [mlx5_core] +[445537.891491] ? debug_smp_processor_id+0x1b/0x30 +[445537.892037] mlx5e_tc_add_flow+0x79/0x90 [mlx5_core] +[445537.892676] mlx5e_configure_flower+0x305/0x450 [mlx5_core] +[445537.893341] mlx5e_rep_setup_tc_cls_flower+0x3d/0x80 [mlx5_core] +[445537.894037] mlx5e_rep_setup_tc_cb+0x5c/0xa0 [mlx5_core] +[445537.894693] tc_setup_cb_add+0xdc/0x220 +[445537.895177] fl_hw_replace_filter+0x15f/0x220 [cls_flower] +[445537.895767] fl_change+0xe87/0x1190 [cls_flower] +[445537.896302] tc_new_tfilter+0x484/0xa50 + +Fixes: f0da4daa3413 ("net/mlx5e: Refactor ct to use post action infrastructure") +Signed-off-by: Chris Mi +Reviewed-by: Jianbo Liu +Signed-off-by: Saeed Mahameed +Reviewed-by: Automatic Verification +Reviewed-by: Maher Sanalla +Reviewed-by: Shay Drory +Reviewed-by: Moshe Shemesh +Reviewed-by: Shachar Kagan +Reviewed-by: Tariq Toukan +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en/tc/post_act.c | 6 +++++ + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 25 ++++++++++++++++--- + 2 files changed, 27 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +index 4e923a2874aef..86bf007fd05b7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +@@ -83,6 +83,9 @@ mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5_flow_spec *spec; + int err; + ++ if (IS_ERR(post_act)) ++ return PTR_ERR(post_act); ++ + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; +@@ -111,6 +114,9 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po + struct mlx5e_post_act_handle *handle; + int err; + ++ if (IS_ERR(post_act)) ++ return ERR_CAST(post_act); ++ + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +index b62fd37493410..1bead98f73bf5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -444,6 +444,9 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, + struct mlx5e_flow_meter_handle *meter; + enum mlx5e_post_meter_type type; + ++ if (IS_ERR(post_act)) ++ return PTR_ERR(post_act); ++ + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); + if (IS_ERR(meter)) { + mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); +@@ -3736,6 +3739,20 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) + return err; + } + ++static int ++set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) ++{ ++ struct mlx5e_post_act *post_act = get_post_action(priv); ++ ++ if (IS_ERR(post_act)) ++ return PTR_ERR(post_act); ++ ++ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; ++ attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); ++ ++ return 0; ++} ++ + static int + alloc_branch_attr(struct mlx5e_tc_flow *flow, + struct mlx5e_tc_act_branch_ctrl *cond, +@@ -3759,8 +3776,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, + break; + case FLOW_ACTION_ACCEPT: + case FLOW_ACTION_PIPE: +- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; +- attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); ++ if (set_branch_dest_ft(flow->priv, attr)) ++ goto out_err; + break; + case FLOW_ACTION_JUMP: + if (*jump_count) { +@@ -3769,8 +3786,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, + goto out_err; + } + *jump_count = cond->extval; +- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; +- attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); ++ if (set_branch_dest_ft(flow->priv, attr)) ++ goto out_err; + break; + default: + err = -EOPNOTSUPP; +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch b/queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch new file mode 100644 index 00000000000..f8cf573d63f --- /dev/null +++ b/queue-6.6/net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch @@ -0,0 +1,92 @@ +From a35fab7a2c0f6ff983e80dd38255f2699c25632f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 12 Nov 2023 13:50:00 +0200 +Subject: net/mlx5e: Tidy up IPsec NAT-T SA discovery + +From: Leon Romanovsky + +[ Upstream commit c2bf84f1d1a1595dcc45fe867f0e02b331993fee ] + +IPsec NAT-T packets are UDP encapsulated packets over ESP normal ones. +In case they arrive to RX, the SPI and ESP are located in inner header, +while the check was performed on outer header instead. + +That wrong check caused to the situation where received rekeying request +was missed and caused to rekey timeout, which "compensated" this failure +by completing rekeying. + +Fixes: d65954934937 ("net/mlx5e: Support IPsec NAT-T functionality") +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 22 ++++++++++++++----- + include/linux/mlx5/mlx5_ifc.h | 2 +- + 2 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +index fc6aca7c05a48..6dc60be2a697c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +@@ -974,13 +974,22 @@ static void setup_fte_esp(struct mlx5_flow_spec *spec) + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); + } + +-static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi) ++static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) + { + /* SPI number */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + +- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); +- MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi); ++ if (encap) { ++ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, ++ misc_parameters.inner_esp_spi); ++ MLX5_SET(fte_match_param, spec->match_value, ++ misc_parameters.inner_esp_spi, spi); ++ } else { ++ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, ++ misc_parameters.outer_esp_spi); ++ MLX5_SET(fte_match_param, spec->match_value, ++ misc_parameters.outer_esp_spi, spi); ++ } + } + + static void setup_fte_no_frags(struct mlx5_flow_spec *spec) +@@ -1339,8 +1348,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + +- setup_fte_spi(spec, attrs->spi); +- setup_fte_esp(spec); ++ setup_fte_spi(spec, attrs->spi, attrs->encap); ++ if (!attrs->encap) ++ setup_fte_esp(spec); + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + +@@ -1443,7 +1453,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) + + switch (attrs->type) { + case XFRM_DEV_OFFLOAD_CRYPTO: +- setup_fte_spi(spec, attrs->spi); ++ setup_fte_spi(spec, attrs->spi, false); + setup_fte_esp(spec); + setup_fte_reg_a(spec); + break; +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index f08cd13031458..8ac6ae79e0835 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -620,7 +620,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { + + u8 reserved_at_140[0x8]; + u8 bth_dst_qp[0x18]; +- u8 reserved_at_160[0x20]; ++ u8 inner_esp_spi[0x20]; + u8 outer_esp_spi[0x20]; + u8 reserved_at_1a0[0x60]; + }; +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch b/queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch new file mode 100644 index 00000000000..d69f2468bcb --- /dev/null +++ b/queue-6.6/net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch @@ -0,0 +1,461 @@ +From 9a5f6e2baf4ae34be44de27e1ea70124945d2d32 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 14:06:18 +0300 +Subject: net/mlx5e: Unify esw and normal IPsec status table + creation/destruction + +From: Patrisious Haddad + +[ Upstream commit 94af50c0a9bb961fe93cf0fdd14eb0883da86721 ] + +Change normal IPsec flow to use the same creation/destruction functions +for status flow table as that of ESW, which first of all refines the +code to have less code duplication. + +And more importantly, the ESW status table handles IPsec syndrome +checks at steering by HW, which is more efficient than the previous +behaviour we had where it was copied to WQE meta data and checked +by the driver. + +Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") +Signed-off-by: Patrisious Haddad +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 187 +++++++++++++----- + .../mellanox/mlx5/core/esw/ipsec_fs.c | 152 -------------- + .../mellanox/mlx5/core/esw/ipsec_fs.h | 15 -- + 3 files changed, 141 insertions(+), 213 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +index 7dba4221993f0..fc6aca7c05a48 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +@@ -128,63 +128,166 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, + return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + } + +-static int ipsec_status_rule(struct mlx5_core_dev *mdev, +- struct mlx5e_ipsec_rx *rx, +- struct mlx5_flow_destination *dest) ++static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx) + { +- u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; ++ mlx5_del_flow_rules(rx->status_drop.rule); ++ mlx5_destroy_flow_group(rx->status_drop.group); ++ mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); ++} ++ ++static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx) ++{ ++ mlx5_del_flow_rules(rx->status.rule); ++ ++ if (rx != ipsec->rx_esw) ++ return; ++ ++#ifdef CONFIG_MLX5_ESWITCH ++ mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); ++#endif ++} ++ ++static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx) ++{ ++ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); ++ struct mlx5_flow_table *ft = rx->ft.status; ++ struct mlx5_core_dev *mdev = ipsec->mdev; ++ struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; +- struct mlx5_modify_hdr *modify_hdr; +- struct mlx5_flow_handle *fte; ++ struct mlx5_flow_handle *rule; ++ struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; +- int err; ++ struct mlx5_flow_group *g; ++ u32 *flow_group_in; ++ int err = 0; + ++ flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); +- if (!spec) +- return -ENOMEM; ++ if (!flow_group_in || !spec) { ++ err = -ENOMEM; ++ goto err_out; ++ } + +- /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ +- MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); +- MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); +- MLX5_SET(copy_action_in, action, src_offset, 0); +- MLX5_SET(copy_action_in, action, length, 7); +- MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); +- MLX5_SET(copy_action_in, action, dst_offset, 24); ++ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); ++ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); ++ g = mlx5_create_flow_group(ft, flow_group_in); ++ if (IS_ERR(g)) { ++ err = PTR_ERR(g); ++ mlx5_core_err(mdev, ++ "Failed to add ipsec rx status drop flow group, err=%d\n", err); ++ goto err_out; ++ } + +- modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, +- 1, action); ++ flow_counter = mlx5_fc_create(mdev, false); ++ if (IS_ERR(flow_counter)) { ++ err = PTR_ERR(flow_counter); ++ mlx5_core_err(mdev, ++ "Failed to add ipsec rx status drop rule counter, err=%d\n", err); ++ goto err_cnt; ++ } + +- if (IS_ERR(modify_hdr)) { +- err = PTR_ERR(modify_hdr); ++ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; ++ dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; ++ dest.counter_id = mlx5_fc_id(flow_counter); ++ if (rx == ipsec->rx_esw) ++ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; ++ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); ++ if (IS_ERR(rule)) { ++ err = PTR_ERR(rule); + mlx5_core_err(mdev, +- "fail to alloc ipsec copy modify_header_id err=%d\n", err); +- goto out_spec; ++ "Failed to add ipsec rx status drop rule, err=%d\n", err); ++ goto err_rule; + } + +- /* create fte */ +- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | +- MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | ++ rx->status_drop.group = g; ++ rx->status_drop.rule = rule; ++ rx->status_drop_cnt = flow_counter; ++ ++ kvfree(flow_group_in); ++ kvfree(spec); ++ return 0; ++ ++err_rule: ++ mlx5_fc_destroy(mdev, flow_counter); ++err_cnt: ++ mlx5_destroy_flow_group(g); ++err_out: ++ kvfree(flow_group_in); ++ kvfree(spec); ++ return err; ++} ++ ++static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx, ++ struct mlx5_flow_destination *dest) ++{ ++ struct mlx5_flow_act flow_act = {}; ++ struct mlx5_flow_handle *rule; ++ struct mlx5_flow_spec *spec; ++ int err; ++ ++ spec = kvzalloc(sizeof(*spec), GFP_KERNEL); ++ if (!spec) ++ return -ENOMEM; ++ ++ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, ++ misc_parameters_2.ipsec_syndrome); ++ MLX5_SET(fte_match_param, spec->match_value, ++ misc_parameters_2.ipsec_syndrome, 0); ++ if (rx == ipsec->rx_esw) ++ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; ++ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; ++ flow_act.flags = FLOW_ACT_NO_APPEND; ++ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; +- flow_act.modify_hdr = modify_hdr; +- fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); +- if (IS_ERR(fte)) { +- err = PTR_ERR(fte); +- mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err); +- goto out; ++ rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); ++ if (IS_ERR(rule)) { ++ err = PTR_ERR(rule); ++ mlx5_core_warn(ipsec->mdev, ++ "Failed to add ipsec rx status pass rule, err=%d\n", err); ++ goto err_rule; + } + ++ rx->status.rule = rule; + kvfree(spec); +- rx->status.rule = fte; +- rx->status.modify_hdr = modify_hdr; + return 0; + +-out: +- mlx5_modify_header_dealloc(mdev, modify_hdr); +-out_spec: ++err_rule: + kvfree(spec); + return err; + } + ++static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx) ++{ ++ ipsec_rx_status_pass_destroy(ipsec, rx); ++ ipsec_rx_status_drop_destroy(ipsec, rx); ++} ++ ++static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, ++ struct mlx5e_ipsec_rx *rx, ++ struct mlx5_flow_destination *dest) ++{ ++ int err; ++ ++ err = ipsec_rx_status_drop_create(ipsec, rx); ++ if (err) ++ return err; ++ ++ err = ipsec_rx_status_pass_create(ipsec, rx, dest); ++ if (err) ++ goto err_pass_create; ++ ++ return 0; ++ ++err_pass_create: ++ ipsec_rx_status_drop_destroy(ipsec, rx); ++ return err; ++} ++ + static int ipsec_miss_create(struct mlx5_core_dev *mdev, + struct mlx5_flow_table *ft, + struct mlx5e_ipsec_miss *miss, +@@ -256,12 +359,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + mlx5_destroy_flow_table(rx->ft.sa); + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); +- if (rx == ipsec->rx_esw) { +- mlx5_esw_ipsec_rx_status_destroy(ipsec, rx); +- } else { +- mlx5_del_flow_rules(rx->status.rule); +- mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); +- } ++ mlx5_ipsec_rx_status_destroy(ipsec, rx); + mlx5_destroy_flow_table(rx->ft.status); + + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); +@@ -351,10 +449,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); +- if (rx == ipsec->rx_esw) +- err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest); +- else +- err = ipsec_status_rule(mdev, rx, dest); ++ err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); + if (err) + goto err_add; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +index 095f31f380fa3..13b5916b64e22 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +@@ -21,158 +21,6 @@ enum { + MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, + }; + +-static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx) +-{ +- mlx5_del_flow_rules(rx->status_drop.rule); +- mlx5_destroy_flow_group(rx->status_drop.group); +- mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +-} +- +-static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx) +-{ +- mlx5_del_flow_rules(rx->status.rule); +- mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +-} +- +-static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx) +-{ +- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); +- struct mlx5_flow_table *ft = rx->ft.status; +- struct mlx5_core_dev *mdev = ipsec->mdev; +- struct mlx5_flow_destination dest = {}; +- struct mlx5_flow_act flow_act = {}; +- struct mlx5_flow_handle *rule; +- struct mlx5_fc *flow_counter; +- struct mlx5_flow_spec *spec; +- struct mlx5_flow_group *g; +- u32 *flow_group_in; +- int err = 0; +- +- flow_group_in = kvzalloc(inlen, GFP_KERNEL); +- spec = kvzalloc(sizeof(*spec), GFP_KERNEL); +- if (!flow_group_in || !spec) { +- err = -ENOMEM; +- goto err_out; +- } +- +- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); +- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); +- g = mlx5_create_flow_group(ft, flow_group_in); +- if (IS_ERR(g)) { +- err = PTR_ERR(g); +- mlx5_core_err(mdev, +- "Failed to add ipsec rx status drop flow group, err=%d\n", err); +- goto err_out; +- } +- +- flow_counter = mlx5_fc_create(mdev, false); +- if (IS_ERR(flow_counter)) { +- err = PTR_ERR(flow_counter); +- mlx5_core_err(mdev, +- "Failed to add ipsec rx status drop rule counter, err=%d\n", err); +- goto err_cnt; +- } +- +- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; +- dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; +- dest.counter_id = mlx5_fc_id(flow_counter); +- spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +- rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); +- if (IS_ERR(rule)) { +- err = PTR_ERR(rule); +- mlx5_core_err(mdev, +- "Failed to add ipsec rx status drop rule, err=%d\n", err); +- goto err_rule; +- } +- +- rx->status_drop.group = g; +- rx->status_drop.rule = rule; +- rx->status_drop_cnt = flow_counter; +- +- kvfree(flow_group_in); +- kvfree(spec); +- return 0; +- +-err_rule: +- mlx5_fc_destroy(mdev, flow_counter); +-err_cnt: +- mlx5_destroy_flow_group(g); +-err_out: +- kvfree(flow_group_in); +- kvfree(spec); +- return err; +-} +- +-static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx, +- struct mlx5_flow_destination *dest) +-{ +- struct mlx5_flow_act flow_act = {}; +- struct mlx5_flow_handle *rule; +- struct mlx5_flow_spec *spec; +- int err; +- +- spec = kvzalloc(sizeof(*spec), GFP_KERNEL); +- if (!spec) +- return -ENOMEM; +- +- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, +- misc_parameters_2.ipsec_syndrome); +- MLX5_SET(fte_match_param, spec->match_value, +- misc_parameters_2.ipsec_syndrome, 0); +- spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; +- flow_act.flags = FLOW_ACT_NO_APPEND; +- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | +- MLX5_FLOW_CONTEXT_ACTION_COUNT; +- rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); +- if (IS_ERR(rule)) { +- err = PTR_ERR(rule); +- mlx5_core_warn(ipsec->mdev, +- "Failed to add ipsec rx status pass rule, err=%d\n", err); +- goto err_rule; +- } +- +- rx->status.rule = rule; +- kvfree(spec); +- return 0; +- +-err_rule: +- kvfree(spec); +- return err; +-} +- +-void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx) +-{ +- esw_ipsec_rx_status_pass_destroy(ipsec, rx); +- esw_ipsec_rx_status_drop_destroy(ipsec, rx); +-} +- +-int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx, +- struct mlx5_flow_destination *dest) +-{ +- int err; +- +- err = esw_ipsec_rx_status_drop_create(ipsec, rx); +- if (err) +- return err; +- +- err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest); +- if (err) +- goto err_pass_create; +- +- return 0; +- +-err_pass_create: +- esw_ipsec_rx_status_drop_destroy(ipsec, rx); +- return err; +-} +- + void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) + { +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +index 0c90f7a8b0d32..ac9c65b89166e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +@@ -8,11 +8,6 @@ struct mlx5e_ipsec; + struct mlx5e_ipsec_sa_entry; + + #ifdef CONFIG_MLX5_ESWITCH +-void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx); +-int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx, +- struct mlx5_flow_destination *dest); + void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr); + int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, +@@ -26,16 +21,6 @@ void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr); + void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); + #else +-static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx) {} +- +-static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, +- struct mlx5e_ipsec_rx *rx, +- struct mlx5_flow_destination *dest) +-{ +- return -EINVAL; +-} +- + static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) {} + +-- +2.43.0 + diff --git a/queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch b/queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch new file mode 100644 index 00000000000..20cb1aa68ee --- /dev/null +++ b/queue-6.6/net-remove-acked-syn-flag-from-packet-in-the-transmi.patch @@ -0,0 +1,111 @@ +From f3f5b17e5fc1ea851d3abb63dc7b50dcfd548561 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Dec 2023 10:02:00 +0800 +Subject: net: Remove acked SYN flag from packet in the transmit queue + correctly + +From: Dong Chenchen + +[ Upstream commit f99cd56230f56c8b6b33713c5be4da5d6766be1f ] + +syzkaller report: + + kernel BUG at net/core/skbuff.c:3452! + invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI + CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.7.0-rc4-00009-gbee0e7762ad2-dirty #135 + RIP: 0010:skb_copy_and_csum_bits (net/core/skbuff.c:3452) + Call Trace: + icmp_glue_bits (net/ipv4/icmp.c:357) + __ip_append_data.isra.0 (net/ipv4/ip_output.c:1165) + ip_append_data (net/ipv4/ip_output.c:1362 net/ipv4/ip_output.c:1341) + icmp_push_reply (net/ipv4/icmp.c:370) + __icmp_send (./include/net/route.h:252 net/ipv4/icmp.c:772) + ip_fragment.constprop.0 (./include/linux/skbuff.h:1234 net/ipv4/ip_output.c:592 net/ipv4/ip_output.c:577) + __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:295) + ip_output (net/ipv4/ip_output.c:427) + __ip_queue_xmit (net/ipv4/ip_output.c:535) + __tcp_transmit_skb (net/ipv4/tcp_output.c:1462) + __tcp_retransmit_skb (net/ipv4/tcp_output.c:3387) + tcp_retransmit_skb (net/ipv4/tcp_output.c:3404) + tcp_retransmit_timer (net/ipv4/tcp_timer.c:604) + tcp_write_timer (./include/linux/spinlock.h:391 net/ipv4/tcp_timer.c:716) + +The panic issue was trigered by tcp simultaneous initiation. +The initiation process is as follows: + + TCP A TCP B + + 1. CLOSED CLOSED + + 2. SYN-SENT --> ... + + 3. SYN-RECEIVED <-- <-- SYN-SENT + + 4. ... --> SYN-RECEIVED + + 5. SYN-RECEIVED --> ... + + // TCP B: not send challenge ack for ack limit or packet loss + // TCP A: close + tcp_close + tcp_send_fin + if (!tskb && tcp_under_memory_pressure(sk)) + tskb = skb_rb_last(&sk->tcp_rtx_queue); //pick SYN_ACK packet + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; // set FIN flag + + 6. FIN_WAIT_1 --> ... + + // TCP B: send challenge ack to SYN_FIN_ACK + + 7. ... <-- SYN-RECEIVED //challenge ack + + // TCP A: + + 8. FIN_WAIT_1 --> ... // retransmit panic + + __tcp_retransmit_skb //skb->len=0 + tcp_trim_head + len = tp->snd_una - TCP_SKB_CB(skb)->seq // len=101-100 + __pskb_trim_head + skb->data_len -= len // skb->len=-1, wrap around + ... ... + ip_fragment + icmp_glue_bits //BUG_ON + +If we use tcp_trim_head() to remove acked SYN from packet that contains data +or other flags, skb->len will be incorrectly decremented. We can remove SYN +flag that has been acked from rtx_queue earlier than tcp_trim_head(), which +can fix the problem mentioned above. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Co-developed-by: Eric Dumazet +Signed-off-by: Eric Dumazet +Signed-off-by: Dong Chenchen +Link: https://lore.kernel.org/r/20231210020200.1539875-1-dongchenchen2@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_output.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 9ccfdc825004d..cab3c1162c3a6 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -3263,7 +3263,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) + if (skb_still_in_host_queue(sk, skb)) + return -EBUSY; + ++start: + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { ++ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { ++ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; ++ TCP_SKB_CB(skb)->seq++; ++ goto start; ++ } + if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { + WARN_ON_ONCE(1); + return -EINVAL; +-- +2.43.0 + diff --git a/queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch b/queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch new file mode 100644 index 00000000000..008ec37282b --- /dev/null +++ b/queue-6.6/net-rose-fix-use-after-free-in-rose_ioctl.patch @@ -0,0 +1,48 @@ +From dc1d64471279235b3dffed40de35ec35b2a5ecb8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Dec 2023 05:05:38 -0500 +Subject: net/rose: Fix Use-After-Free in rose_ioctl + +From: Hyunwoo Kim + +[ Upstream commit 810c38a369a0a0ce625b5c12169abce1dd9ccd53 ] + +Because rose_ioctl() accesses sk->sk_receive_queue +without holding a sk->sk_receive_queue.lock, it can +cause a race with rose_accept(). +A use-after-free for skb occurs with the following flow. +``` +rose_ioctl() -> skb_peek() +rose_accept() -> skb_dequeue() -> kfree_skb() +``` +Add sk->sk_receive_queue.lock to rose_ioctl() to fix this issue. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Hyunwoo Kim +Link: https://lore.kernel.org/r/20231209100538.GA407321@v4bel-B760M-AORUS-ELITE-AX +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/rose/af_rose.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c +index 49dafe9ac72f0..4a5c2dc8dd7a9 100644 +--- a/net/rose/af_rose.c ++++ b/net/rose/af_rose.c +@@ -1315,9 +1315,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) + case TIOCINQ: { + struct sk_buff *skb; + long amount = 0L; +- /* These two are safe on a single CPU system as only user tasks fiddle here */ ++ ++ spin_lock_irq(&sk->sk_receive_queue.lock); + if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) + amount = skb->len; ++ spin_unlock_irq(&sk->sk_receive_queue.lock); + return put_user(amount, (unsigned int __user *) argp); + } + +-- +2.43.0 + diff --git a/queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch b/queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch new file mode 100644 index 00000000000..c5de32bce61 --- /dev/null +++ b/queue-6.6/net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch @@ -0,0 +1,195 @@ +From f52927f6cdb3fd65812d55e619bc4c53eb46aba2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Dec 2023 18:25:54 +0100 +Subject: net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table + +From: Vlad Buslov + +[ Upstream commit 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc ] + +The referenced change added custom cleanup code to act_ct to delete any +callbacks registered on the parent block when deleting the +tcf_ct_flow_table instance. However, the underlying issue is that the +drivers don't obtain the reference to the tcf_ct_flow_table instance when +registering callbacks which means that not only driver callbacks may still +be on the table when deleting it but also that the driver can still have +pointers to its internal nf_flowtable and can use it concurrently which +results either warning in netfilter[0] or use-after-free. + +Fix the issue by taking a reference to the underlying struct +tcf_ct_flow_table instance when registering the callback and release the +reference when unregistering. Expose new API required for such reference +counting by adding two new callbacks to nf_flowtable_type and implementing +them for act_ct flowtable_ct type. This fixes the issue by extending the +lifetime of nf_flowtable until all users have unregistered. + +[0]: +[106170.938634] ------------[ cut here ]------------ +[106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] +[106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm +ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis +try overlay mlx5_core +[106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1 +[106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] +[106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] +[106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00 +[106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202 +[106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48 +[106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000 +[106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700 +[106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58 +[106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000 +[106170.951616] FS: 0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000 +[106170.952329] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0 +[106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[106170.954766] Call Trace: +[106170.955057] +[106170.955315] ? __warn+0x79/0x120 +[106170.955648] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] +[106170.956172] ? report_bug+0x17c/0x190 +[106170.956537] ? handle_bug+0x3c/0x60 +[106170.956891] ? exc_invalid_op+0x14/0x70 +[106170.957264] ? asm_exc_invalid_op+0x16/0x20 +[106170.957666] ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core] +[106170.958172] ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core] +[106170.958788] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] +[106170.959339] ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core] +[106170.959854] ? mapping_remove+0x154/0x1d0 [mlx5_core] +[106170.960342] ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core] +[106170.960927] mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core] +[106170.961441] mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core] +[106170.962001] mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core] +[106170.962524] mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core] +[106170.963034] mlx5e_flow_put+0x73/0xe0 [mlx5_core] +[106170.963506] mlx5e_put_flow_list+0x38/0x70 [mlx5_core] +[106170.964002] mlx5e_rep_update_flows+0xec/0x290 [mlx5_core] +[106170.964525] mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core] +[106170.965056] process_one_work+0x13a/0x2c0 +[106170.965443] worker_thread+0x2e5/0x3f0 +[106170.965808] ? rescuer_thread+0x410/0x410 +[106170.966192] kthread+0xc6/0xf0 +[106170.966515] ? kthread_complete_and_exit+0x20/0x20 +[106170.966970] ret_from_fork+0x2d/0x50 +[106170.967332] ? kthread_complete_and_exit+0x20/0x20 +[106170.967774] ret_from_fork_asm+0x11/0x20 +[106170.970466] +[106170.970726] ---[ end trace 0000000000000000 ]--- + +Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks") +Signed-off-by: Vlad Buslov +Reviewed-by: Paul Blakey +Acked-by: Pablo Neira Ayuso +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_flow_table.h | 10 ++++++++ + net/sched/act_ct.c | 34 ++++++++++++++++++++++----- + 2 files changed, 38 insertions(+), 6 deletions(-) + +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index fe1507c1db828..692d5955911c7 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -62,6 +62,8 @@ struct nf_flowtable_type { + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); + void (*free)(struct nf_flowtable *ft); ++ void (*get)(struct nf_flowtable *ft); ++ void (*put)(struct nf_flowtable *ft); + nf_hookfn *hook; + struct module *owner; + }; +@@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + } + + list_add_tail(&block_cb->list, &block->cb_list); ++ up_write(&flow_table->flow_block_lock); ++ ++ if (flow_table->type->get) ++ flow_table->type->get(flow_table); ++ return 0; + + unlock: + up_write(&flow_table->flow_block_lock); +@@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + WARN_ON(true); + } + up_write(&flow_table->flow_block_lock); ++ ++ if (flow_table->type->put) ++ flow_table->type->put(flow_table); + } + + void flow_offload_route_init(struct flow_offload *flow, +diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c +index 6dcc4585576e8..dd710fb9f4905 100644 +--- a/net/sched/act_ct.c ++++ b/net/sched/act_ct.c +@@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); + } + ++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); ++ ++static void tcf_ct_nf_get(struct nf_flowtable *ft) ++{ ++ struct tcf_ct_flow_table *ct_ft = ++ container_of(ft, struct tcf_ct_flow_table, nf_ft); ++ ++ tcf_ct_flow_table_get_ref(ct_ft); ++} ++ ++static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); ++ ++static void tcf_ct_nf_put(struct nf_flowtable *ft) ++{ ++ struct tcf_ct_flow_table *ct_ft = ++ container_of(ft, struct tcf_ct_flow_table, nf_ft); ++ ++ tcf_ct_flow_table_put(ct_ft); ++} ++ + static struct nf_flowtable_type flowtable_ct = { + .gc = tcf_ct_flow_is_outdated, + .action = tcf_ct_flow_table_fill_actions, ++ .get = tcf_ct_nf_get, ++ .put = tcf_ct_nf_put, + .owner = THIS_MODULE, + }; + +@@ -337,9 +359,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) + return err; + } + ++static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) ++{ ++ refcount_inc(&ct_ft->ref); ++} ++ + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) + { +- struct flow_block_cb *block_cb, *tmp_cb; + struct tcf_ct_flow_table *ct_ft; + struct flow_block *block; + +@@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) + rwork); + nf_flow_table_free(&ct_ft->nf_ft); + +- /* Remove any remaining callbacks before cleanup */ + block = &ct_ft->nf_ft.flow_block; + down_write(&ct_ft->nf_ft.flow_block_lock); +- list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { +- list_del(&block_cb->list); +- flow_block_cb_free(block_cb); +- } ++ WARN_ON(!list_empty(&block->cb_list)); + up_write(&ct_ft->nf_ft.flow_block_lock); + kfree(ct_ft); + +-- +2.43.0 + diff --git a/queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch b/queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch new file mode 100644 index 00000000000..841e395efdd --- /dev/null +++ b/queue-6.6/net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch @@ -0,0 +1,69 @@ +From 06aba8a79aed14f486bf2bb12c9983603d206373 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 14:52:08 +0530 +Subject: net: stmmac: dwmac-qcom-ethqos: Fix drops in 10M SGMII RX + +From: Sneh Shah + +[ Upstream commit 981d947bcd382c3950a593690e0e13d194d65b1c ] + +In 10M SGMII mode all the packets are being dropped due to wrong Rx clock. +SGMII 10MBPS mode needs RX clock divider programmed to avoid drops in Rx. +Update configure SGMII function with Rx clk divider programming. + +Fixes: 463120c31c58 ("net: stmmac: dwmac-qcom-ethqos: add support for SGMII") +Tested-by: Andrew Halaney +Signed-off-by: Sneh Shah +Reviewed-by: Bjorn Andersson +Link: https://lore.kernel.org/r/20231212092208.22393-1-quic_snehshah@quicinc.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +index d3bf42d0fceb6..31631e3f89d0a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +@@ -34,6 +34,7 @@ + #define RGMII_CONFIG_LOOPBACK_EN BIT(2) + #define RGMII_CONFIG_PROG_SWAP BIT(1) + #define RGMII_CONFIG_DDR_MODE BIT(0) ++#define RGMII_CONFIG_SGMII_CLK_DVDR GENMASK(18, 10) + + /* SDCC_HC_REG_DLL_CONFIG fields */ + #define SDCC_DLL_CONFIG_DLL_RST BIT(30) +@@ -78,6 +79,8 @@ + #define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) + #define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) + ++#define SGMII_10M_RX_CLK_DVDR 0x31 ++ + struct ethqos_emac_por { + unsigned int offset; + unsigned int value; +@@ -598,6 +601,9 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) + return 0; + } + ++/* On interface toggle MAC registers gets reset. ++ * Configure MAC block for SGMII on ethernet phy link up ++ */ + static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) + { + int val; +@@ -617,6 +623,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) + case SPEED_10: + val |= ETHQOS_MAC_CTRL_PORT_SEL; + val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; ++ rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR, ++ FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR, ++ SGMII_10M_RX_CLK_DVDR), ++ RGMII_IO_MACRO_CONFIG); + break; + } + +-- +2.43.0 + diff --git a/queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch b/queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch new file mode 100644 index 00000000000..42102f66eda --- /dev/null +++ b/queue-6.6/net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch @@ -0,0 +1,45 @@ +From 6edd84c6c95981d99e9951fb755012e5584288d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 16:18:33 -0600 +Subject: net: stmmac: Handle disabled MDIO busses from devicetree + +From: Andrew Halaney + +[ Upstream commit e23c0d21ce9234fbc31ece35663ababbb83f9347 ] + +Many hardware configurations have the MDIO bus disabled, and are instead +using some other MDIO bus to talk to the MAC's phy. + +of_mdiobus_register() returns -ENODEV in this case. Let's handle it +gracefully instead of failing to probe the MAC. + +Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.") +Signed-off-by: Andrew Halaney +Reviewed-by: Serge Semin +Link: https://lore.kernel.org/r/20231212-b4-stmmac-handle-mdio-enodev-v2-1-600171acf79f@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +index fa9e7e7040b94..0542cfd1817e6 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +@@ -591,7 +591,11 @@ int stmmac_mdio_register(struct net_device *ndev) + new_bus->parent = priv->device; + + err = of_mdiobus_register(new_bus, mdio_node); +- if (err != 0) { ++ if (err == -ENODEV) { ++ err = 0; ++ dev_info(dev, "MDIO bus is disabled\n"); ++ goto bus_register_fail; ++ } else if (err) { + dev_err_probe(dev, err, "Cannot register the MDIO bus\n"); + goto bus_register_fail; + } +-- +2.43.0 + diff --git a/queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch b/queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch new file mode 100644 index 00000000000..9bdf2da2b11 --- /dev/null +++ b/queue-6.6/octeon_ep-explicitly-test-for-firmware-ready-value.patch @@ -0,0 +1,41 @@ +From 1457b6f886ac66a5f6e601d50aaeb9bd14b64387 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 21:56:46 -0800 +Subject: octeon_ep: explicitly test for firmware ready value + +From: Shinas Rasheed + +[ Upstream commit 284f717622417cb267e344a9174f8e5698d1e3c1 ] + +The firmware ready value is 1, and get firmware ready status +function should explicitly test for that value. The firmware +ready value read will be 2 after driver load, and on unbind +till firmware rewrites the firmware ready back to 0, the value +seen by driver will be 2, which should be regarded as not ready. + +Fixes: 10c073e40469 ("octeon_ep: defer probe if firmware not ready") +Signed-off-by: Shinas Rasheed +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +index 5b46ca47c8e59..2ee1374db4c06 100644 +--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c ++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +@@ -1076,7 +1076,8 @@ static bool get_fw_ready_status(struct pci_dev *pdev) + + pci_read_config_byte(pdev, (pos + 8), &status); + dev_info(&pdev->dev, "Firmware ready status = %u\n", status); +- return status; ++#define FW_STATUS_READY 1ULL ++ return status == FW_STATUS_READY; + } + return false; + } +-- +2.43.0 + diff --git a/queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch b/queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch new file mode 100644 index 00000000000..7dce9b5dcc8 --- /dev/null +++ b/queue-6.6/octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch @@ -0,0 +1,63 @@ +From 501566ee9ce3eab124281068846257dc464950b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Dec 2023 17:49:16 +0800 +Subject: octeontx2-af: fix a use-after-free in rvu_nix_register_reporters + +From: Zhipeng Lu + +[ Upstream commit 28a7cb045ab700de5554193a1642917602787784 ] + +The rvu_dl will be freed in rvu_nix_health_reporters_destroy(rvu_dl) +after the create_workqueue fails, and after that free, the rvu_dl will +be translate back through the following call chain: + +rvu_nix_health_reporters_destroy + |-> rvu_nix_health_reporters_create + |-> rvu_health_reporters_create + |-> rvu_register_dl (label err_dl_health) + +Finally. in the err_dl_health label, rvu_dl being freed again in +rvu_health_reporters_destroy(rvu) by rvu_nix_health_reporters_destroy. +In the second calls of rvu_nix_health_reporters_destroy, however, +it uses rvu_dl->rvu_nix_health_reporter, which is already freed at +the end of rvu_nix_health_reporters_destroy in the first call. + +So this patch prevents the first destroy by instantly returning -ENONMEN +when create_workqueue fails. In addition, since the failure of +create_workqueue is the only entrence of label err, it has been +integrated into the error-handling path of create_workqueue. + +Fixes: 5ed66306eab6 ("octeontx2-af: Add devlink health reporters for NIX") +Signed-off-by: Zhipeng Lu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +index 058f75dc4c8a5..bffe04e6d0254 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +@@ -642,7 +642,7 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) + + rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); + if (!rvu_dl->devlink_wq) +- goto err; ++ return -ENOMEM; + + INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work); + INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work); +@@ -650,9 +650,6 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) + INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work); + + return 0; +-err: +- rvu_nix_health_reporters_destroy(rvu_dl); +- return -ENOMEM; + } + + static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl) +-- +2.43.0 + diff --git a/queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch b/queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch new file mode 100644 index 00000000000..0a92ac6d134 --- /dev/null +++ b/queue-6.6/octeontx2-af-fix-pause-frame-configuration.patch @@ -0,0 +1,56 @@ +From a59de66c19f8ea84e21207d5524c28ecb6976545 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 14:57:54 +0530 +Subject: octeontx2-af: Fix pause frame configuration + +From: Hariprasad Kelam + +[ Upstream commit e307b5a845c5951dabafc48d00b6424ee64716c4 ] + +The current implementation's default Pause Forward setting is causing +unnecessary network traffic. This patch disables Pause Forward to +address this issue. + +Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support") +Signed-off-by: Hariprasad Kelam +Signed-off-by: Sunil Kovvuri Goutham +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +index af21e2030cff2..4728ba34b0e34 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +@@ -373,6 +373,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + ++ /* Disable forward pause to driver */ ++ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); ++ cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD; ++ rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); ++ + /* Enable channel mask for all LMACS */ + if (is_dev_rpm2(rpm)) + rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff); +@@ -616,12 +621,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p + + if (rx_pause) { + cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | +- RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | +- RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); ++ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); + } else { + cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | +- RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | +- RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); ++ RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); + } + + if (tx_pause) { +-- +2.43.0 + diff --git a/queue-6.6/octeontx2-af-update-rss-algorithm-index.patch b/queue-6.6/octeontx2-af-update-rss-algorithm-index.patch new file mode 100644 index 00000000000..0b556cb960a --- /dev/null +++ b/queue-6.6/octeontx2-af-update-rss-algorithm-index.patch @@ -0,0 +1,147 @@ +From c10d3bce4c637e39d51cb51c14a73ace77b2fd4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 12:26:10 +0530 +Subject: octeontx2-af: Update RSS algorithm index + +From: Hariprasad Kelam + +[ Upstream commit 570ba37898ecd9069beb58bf0b6cf84daba6e0fe ] + +The RSS flow algorithm is not set up correctly for promiscuous or all +multi MCAM entries. This has an impact on flow distribution. + +This patch fixes the issue by updating flow algorithm index in above +mentioned MCAM entries. + +Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") +Signed-off-by: Hariprasad Kelam +Signed-off-by: Sunil Kovvuri Goutham +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/af/rvu_npc.c | 55 +++++++++++++++---- + 1 file changed, 44 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +index f65805860c8d4..0bcf3e5592806 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +@@ -671,6 +671,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, + int blkaddr, ucast_idx, index; + struct nix_rx_action action = { 0 }; + u64 relaxed_mask; ++ u8 flow_key_alg; + + if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc)) + return; +@@ -701,6 +702,8 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, + action.op = NIX_RX_ACTIONOP_UCAST; + } + ++ flow_key_alg = action.flow_key_alg; ++ + /* RX_ACTION set to MCAST for CGX PF's */ + if (hw->cap.nix_rx_multicast && pfvf->use_mce_list && + is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { +@@ -740,7 +743,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, + req.vf = pcifunc; + req.index = action.index; + req.match_id = action.match_id; +- req.flow_key_alg = action.flow_key_alg; ++ req.flow_key_alg = flow_key_alg; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); + } +@@ -854,6 +857,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, + u8 mac_addr[ETH_ALEN] = { 0 }; + struct nix_rx_action action = { 0 }; + struct rvu_pfvf *pfvf; ++ u8 flow_key_alg; + u16 vf_func; + + /* Only CGX PF/VF can add allmulticast entry */ +@@ -888,6 +892,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, + *(u64 *)&action = npc_get_mcam_action(rvu, mcam, + blkaddr, ucast_idx); + ++ flow_key_alg = action.flow_key_alg; + if (action.op != NIX_RX_ACTIONOP_RSS) { + *(u64 *)&action = 0; + action.op = NIX_RX_ACTIONOP_UCAST; +@@ -924,7 +929,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, + req.vf = pcifunc | vf_func; + req.index = action.index; + req.match_id = action.match_id; +- req.flow_key_alg = action.flow_key_alg; ++ req.flow_key_alg = flow_key_alg; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); + } +@@ -990,11 +995,38 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, + mutex_unlock(&mcam->lock); + } + ++static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_action action, ++ struct rvu_pfvf *pfvf, int mcam_index, int blkaddr, ++ int alg_idx) ++ ++{ ++ struct npc_mcam *mcam = &rvu->hw->mcam; ++ struct rvu_hwinfo *hw = rvu->hw; ++ int bank, op_rss; ++ ++ if (!is_mcam_entry_enabled(rvu, mcam, blkaddr, mcam_index)) ++ return; ++ ++ op_rss = (!hw->cap.nix_rx_multicast || !pfvf->use_mce_list); ++ ++ bank = npc_get_bank(mcam, mcam_index); ++ mcam_index &= (mcam->banksize - 1); ++ ++ /* If Rx action is MCAST update only RSS algorithm index */ ++ if (!op_rss) { ++ *(u64 *)&action = rvu_read64(rvu, blkaddr, ++ NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank)); ++ ++ action.flow_key_alg = alg_idx; ++ } ++ rvu_write64(rvu, blkaddr, ++ NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank), *(u64 *)&action); ++} ++ + void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, + int group, int alg_idx, int mcam_index) + { + struct npc_mcam *mcam = &rvu->hw->mcam; +- struct rvu_hwinfo *hw = rvu->hw; + struct nix_rx_action action; + int blkaddr, index, bank; + struct rvu_pfvf *pfvf; +@@ -1050,15 +1082,16 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, + /* If PF's promiscuous entry is enabled, + * Set RSS action for that entry as well + */ +- if ((!hw->cap.nix_rx_multicast || !pfvf->use_mce_list) && +- is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) { +- bank = npc_get_bank(mcam, index); +- index &= (mcam->banksize - 1); ++ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, ++ alg_idx); + +- rvu_write64(rvu, blkaddr, +- NPC_AF_MCAMEX_BANKX_ACTION(index, bank), +- *(u64 *)&action); +- } ++ index = npc_get_nixlf_mcam_index(mcam, pcifunc, ++ nixlf, NIXLF_ALLMULTI_ENTRY); ++ /* If PF's allmulti entry is enabled, ++ * Set RSS action for that entry as well ++ */ ++ npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, ++ alg_idx); + } + + void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, +-- +2.43.0 + diff --git a/queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch b/queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch new file mode 100644 index 00000000000..89e2daafa5d --- /dev/null +++ b/queue-6.6/octeontx2-pf-fix-promisc-mcam-entry-action.patch @@ -0,0 +1,83 @@ +From 8953c7f988432e23b003197992009e0c8542f4d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 12:26:09 +0530 +Subject: octeontx2-pf: Fix promisc mcam entry action + +From: Hariprasad Kelam + +[ Upstream commit dbda436824ded8ef6a05bb82cd9baa8d42377a49 ] + +Current implementation is such that, promisc mcam entry action +is set as multicast even when there are no trusted VFs. multicast +action causes the hardware to copy packet data, which reduces +the performance. + +This patch fixes this issue by setting the promisc mcam entry action to +unicast instead of multicast when there are no trusted VFs. The same +change is made for the 'allmulti' mcam entry action. + +Fixes: ffd2f89ad05c ("octeontx2-pf: Enable promisc/allmulti match MCAM entries.") +Signed-off-by: Hariprasad Kelam +Signed-off-by: Sunil Kovvuri Goutham +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 25 ++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +index 0c17ebdda1487..a57455aebff6f 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +@@ -1650,6 +1650,21 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) + mutex_unlock(&mbox->lock); + } + ++static bool otx2_promisc_use_mce_list(struct otx2_nic *pfvf) ++{ ++ int vf; ++ ++ /* The AF driver will determine whether to allow the VF netdev or not */ ++ if (is_otx2_vf(pfvf->pcifunc)) ++ return true; ++ ++ /* check if there are any trusted VFs associated with the PF netdev */ ++ for (vf = 0; vf < pci_num_vf(pfvf->pdev); vf++) ++ if (pfvf->vf_configs[vf].trusted) ++ return true; ++ return false; ++} ++ + static void otx2_do_set_rx_mode(struct otx2_nic *pf) + { + struct net_device *netdev = pf->netdev; +@@ -1682,7 +1697,8 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) + if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST)) + req->mode |= NIX_RX_MODE_ALLMULTI; + +- req->mode |= NIX_RX_MODE_USE_MCE; ++ if (otx2_promisc_use_mce_list(pf)) ++ req->mode |= NIX_RX_MODE_USE_MCE; + + otx2_sync_mbox_msg(&pf->mbox); + mutex_unlock(&pf->mbox.lock); +@@ -2691,11 +2707,14 @@ static int otx2_ndo_set_vf_trust(struct net_device *netdev, int vf, + pf->vf_configs[vf].trusted = enable; + rc = otx2_set_vf_permissions(pf, vf, OTX2_TRUSTED_VF); + +- if (rc) ++ if (rc) { + pf->vf_configs[vf].trusted = !enable; +- else ++ } else { + netdev_info(pf->netdev, "VF %d is %strusted\n", + vf, enable ? "" : "not "); ++ otx2_set_rx_mode(netdev); ++ } ++ + return rc; + } + +-- +2.43.0 + diff --git a/queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch b/queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch new file mode 100644 index 00000000000..b70f7ffb95f --- /dev/null +++ b/queue-6.6/qca_debug-fix-ethtool-g-iface-tx-behavior.patch @@ -0,0 +1,80 @@ +From 88f9f0ef15f2ff257c658563f4b877447c75c38e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Dec 2023 15:12:21 +0100 +Subject: qca_debug: Fix ethtool -G iface tx behavior + +From: Stefan Wahren + +[ Upstream commit 96a7e861d9e04d07febd3011c30cd84cd141d81f ] + +After calling ethtool -g it was not possible to adjust the TX ring +size again: + + # ethtool -g eth1 + Ring parameters for eth1: + Pre-set maximums: + RX: 4 + RX Mini: n/a + RX Jumbo: n/a + TX: 10 + Current hardware settings: + RX: 4 + RX Mini: n/a + RX Jumbo: n/a + TX: 10 + # ethtool -G eth1 tx 8 + netlink error: Invalid argument + +The reason for this is that the readonly setting rx_pending get +initialized and after that the range check in qcaspi_set_ringparam() +fails regardless of the provided parameter. So fix this by accepting +the exposed RX defaults. Instead of adding another magic number +better use a new define here. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Suggested-by: Paolo Abeni +Signed-off-by: Stefan Wahren +Link: https://lore.kernel.org/r/20231206141222.52029-3-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_debug.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c +index a5445252b0c4d..1822f2ad8f0dd 100644 +--- a/drivers/net/ethernet/qualcomm/qca_debug.c ++++ b/drivers/net/ethernet/qualcomm/qca_debug.c +@@ -30,6 +30,8 @@ + + #define QCASPI_MAX_REGS 0x20 + ++#define QCASPI_RX_MAX_FRAMES 4 ++ + static const u16 qcaspi_spi_regs[] = { + SPI_REG_BFR_SIZE, + SPI_REG_WRBUF_SPC_AVA, +@@ -252,9 +254,9 @@ qcaspi_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, + { + struct qcaspi *qca = netdev_priv(dev); + +- ring->rx_max_pending = 4; ++ ring->rx_max_pending = QCASPI_RX_MAX_FRAMES; + ring->tx_max_pending = TX_RING_MAX_LEN; +- ring->rx_pending = 4; ++ ring->rx_pending = QCASPI_RX_MAX_FRAMES; + ring->tx_pending = qca->txr.count; + } + +@@ -265,7 +267,7 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, + { + struct qcaspi *qca = netdev_priv(dev); + +- if ((ring->rx_pending) || ++ if (ring->rx_pending != QCASPI_RX_MAX_FRAMES || + (ring->rx_mini_pending) || + (ring->rx_jumbo_pending)) + return -EINVAL; +-- +2.43.0 + diff --git a/queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch b/queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch new file mode 100644 index 00000000000..f4af99e7813 --- /dev/null +++ b/queue-6.6/qca_debug-prevent-crash-on-tx-ring-changes.patch @@ -0,0 +1,86 @@ +From 92e0ef9f0b2a85c10a5af05de91c543d4b4e4592 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Dec 2023 15:12:20 +0100 +Subject: qca_debug: Prevent crash on TX ring changes + +From: Stefan Wahren + +[ Upstream commit f4e6064c97c050bd9904925ff7d53d0c9954fc7b ] + +The qca_spi driver stop and restart the SPI kernel thread +(via ndo_stop & ndo_open) in case of TX ring changes. This is +a big issue because it allows userspace to prevent restart of +the SPI kernel thread (via signals). A subsequent change of +TX ring wrongly assume a valid spi_thread pointer which result +in a crash. + +So prevent this by stopping the network traffic handling and +temporary park the SPI thread. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://lore.kernel.org/r/20231206141222.52029-2-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_debug.c | 9 ++++----- + drivers/net/ethernet/qualcomm/qca_spi.c | 12 ++++++++++++ + 2 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c +index 6f2fa2a42770a..a5445252b0c4d 100644 +--- a/drivers/net/ethernet/qualcomm/qca_debug.c ++++ b/drivers/net/ethernet/qualcomm/qca_debug.c +@@ -263,7 +263,6 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) + { +- const struct net_device_ops *ops = dev->netdev_ops; + struct qcaspi *qca = netdev_priv(dev); + + if ((ring->rx_pending) || +@@ -271,14 +270,14 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, + (ring->rx_jumbo_pending)) + return -EINVAL; + +- if (netif_running(dev)) +- ops->ndo_stop(dev); ++ if (qca->spi_thread) ++ kthread_park(qca->spi_thread); + + qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN); + qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN); + +- if (netif_running(dev)) +- ops->ndo_open(dev); ++ if (qca->spi_thread) ++ kthread_unpark(qca->spi_thread); + + return 0; + } +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index bec723028e96c..b0fad69bb755f 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -580,6 +580,18 @@ qcaspi_spi_thread(void *data) + netdev_info(qca->net_dev, "SPI thread created\n"); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); ++ if (kthread_should_park()) { ++ netif_tx_disable(qca->net_dev); ++ netif_carrier_off(qca->net_dev); ++ qcaspi_flush_tx_ring(qca); ++ kthread_parkme(); ++ if (qca->sync == QCASPI_SYNC_READY) { ++ netif_carrier_on(qca->net_dev); ++ netif_wake_queue(qca->net_dev); ++ } ++ continue; ++ } ++ + if ((qca->intr_req == qca->intr_svc) && + !qca->txr.skb[qca->txr.head]) + schedule(); +-- +2.43.0 + diff --git a/queue-6.6/qca_spi-fix-reset-behavior.patch b/queue-6.6/qca_spi-fix-reset-behavior.patch new file mode 100644 index 00000000000..f20487797c5 --- /dev/null +++ b/queue-6.6/qca_spi-fix-reset-behavior.patch @@ -0,0 +1,51 @@ +From e84d2f0f90c9eee06d0ff9f599411b46b7189521 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Dec 2023 15:12:22 +0100 +Subject: qca_spi: Fix reset behavior + +From: Stefan Wahren + +[ Upstream commit 1057812d146dd658c9a9a96d869c2551150207b5 ] + +In case of a reset triggered by the QCA7000 itself, the behavior of the +qca_spi driver was not quite correct: +- in case of a pending RX frame decoding the drop counter must be + incremented and decoding state machine reseted +- also the reset counter must always be incremented regardless of sync + state + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://lore.kernel.org/r/20231206141222.52029-4-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_spi.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index b0fad69bb755f..5f3c11fb3fa27 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -620,11 +620,17 @@ qcaspi_spi_thread(void *data) + if (intr_cause & SPI_INT_CPU_ON) { + qcaspi_qca7k_sync(qca, QCASPI_EVENT_CPUON); + ++ /* Frame decoding in progress */ ++ if (qca->frm_handle.state != qca->frm_handle.init) ++ qca->net_dev->stats.rx_dropped++; ++ ++ qcafrm_fsm_init_spi(&qca->frm_handle); ++ qca->stats.device_reset++; ++ + /* not synced. */ + if (qca->sync != QCASPI_SYNC_READY) + continue; + +- qca->stats.device_reset++; + netif_wake_queue(qca->net_dev); + netif_carrier_on(qca->net_dev); + } +-- +2.43.0 + diff --git a/queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch b/queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch new file mode 100644 index 00000000000..a4ae1b43e8b --- /dev/null +++ b/queue-6.6/qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch @@ -0,0 +1,41 @@ +From 7380016b6103ad007edc7f1190392f85e7c43a6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Dec 2023 12:52:55 +0800 +Subject: qed: Fix a potential use-after-free in qed_cxt_tables_alloc + +From: Dinghao Liu + +[ Upstream commit b65d52ac9c085c0c52dee012a210d4e2f352611b ] + +qed_ilt_shadow_alloc() will call qed_ilt_shadow_free() to +free p_hwfn->p_cxt_mngr->ilt_shadow on error. However, +qed_cxt_tables_alloc() accesses the freed pointer on failure +of qed_ilt_shadow_alloc() through calling qed_cxt_mngr_free(), +which may lead to use-after-free. Fix this issue by setting +p_mngr->ilt_shadow to NULL in qed_ilt_shadow_free(). + +Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support") +Reviewed-by: Przemek Kitszel +Signed-off-by: Dinghao Liu +Link: https://lore.kernel.org/r/20231210045255.21383-1-dinghao.liu@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c +index 65e20693c549e..33f4f58ee51c6 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c +@@ -933,6 +933,7 @@ static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn) + p_dma->virt_addr = NULL; + } + kfree(p_mngr->ilt_shadow); ++ p_mngr->ilt_shadow = NULL; + } + + static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, +-- +2.43.0 + diff --git a/queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch b/queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch new file mode 100644 index 00000000000..f835dff0792 --- /dev/null +++ b/queue-6.6/rdma-mlx5-send-events-from-ib-driver-about-device-af.patch @@ -0,0 +1,123 @@ +From 25d4896facd82be6ab7458ba05cf40d0fbc4721d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 15:10:27 +0300 +Subject: RDMA/mlx5: Send events from IB driver about device affiliation state + +From: Patrisious Haddad + +[ Upstream commit 0d293714ac32650bfb669ceadf7cc2fad8161401 ] + +Send blocking events from IB driver whenever the device is done being +affiliated or if it is removed from an affiliation. + +This is useful since now the EN driver can register to those event and +know when a device is affiliated or not. + +Signed-off-by: Patrisious Haddad +Reviewed-by: Mark Bloch +Link: https://lore.kernel.org/r/a7491c3e483cfd8d962f5f75b9a25f253043384a.1695296682.git.leon@kernel.org +Signed-off-by: Leon Romanovsky +Stable-dep-of: 762a55a54eec ("net/mlx5e: Disable IPsec offload support if not FW steering") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/main.c | 17 +++++++++++++++++ + drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++ + include/linux/mlx5/device.h | 2 ++ + include/linux/mlx5/driver.h | 2 ++ + 4 files changed, 27 insertions(+) + +diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c +index 5d963abb7e609..4c4233b9c8b08 100644 +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -3175,6 +3176,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, + + lockdep_assert_held(&mlx5_ib_multiport_mutex); + ++ mlx5_core_mp_event_replay(ibdev->mdev, ++ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, ++ NULL); ++ mlx5_core_mp_event_replay(mpi->mdev, ++ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, ++ NULL); ++ + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); + + spin_lock(&port->mp.mpi_lock); +@@ -3226,6 +3234,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, + struct mlx5_ib_multiport_info *mpi) + { + u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; ++ u64 key; + int err; + + lockdep_assert_held(&mlx5_ib_multiport_mutex); +@@ -3254,6 +3263,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, + + mlx5_ib_init_cong_debugfs(ibdev, port_num); + ++ key = ibdev->ib_dev.index; ++ mlx5_core_mp_event_replay(mpi->mdev, ++ MLX5_DRIVER_EVENT_AFFILIATION_DONE, ++ &key); ++ mlx5_core_mp_event_replay(ibdev->mdev, ++ MLX5_DRIVER_EVENT_AFFILIATION_DONE, ++ &key); ++ + return true; + + unbind: +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 15561965d2afa..6ca91c0e8a6a5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -361,6 +361,12 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) + } + EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); + ++void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) ++{ ++ mlx5_blocking_notifier_call_chain(dev, event, data); ++} ++EXPORT_SYMBOL(mlx5_core_mp_event_replay); ++ + int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) + { +diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h +index 4d5be378fa8cc..26333d602a505 100644 +--- a/include/linux/mlx5/device.h ++++ b/include/linux/mlx5/device.h +@@ -366,6 +366,8 @@ enum mlx5_driver_event { + MLX5_DRIVER_EVENT_UPLINK_NETDEV, + MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, + MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, ++ MLX5_DRIVER_EVENT_AFFILIATION_DONE, ++ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + }; + + enum { +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index 3033bbaeac81c..5ca4e085d8133 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -1027,6 +1027,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); + void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); + void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); + ++void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); ++ + void mlx5_health_cleanup(struct mlx5_core_dev *dev); + int mlx5_health_init(struct mlx5_core_dev *dev); + void mlx5_start_health_poll(struct mlx5_core_dev *dev); +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index ab7f0d064ac..aaeaa930968 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -4,3 +4,53 @@ ksmbd-fix-memory-leak-in-smb2_lock.patch efi-x86-avoid-physical-kaslr-on-older-dell-systems.patch afs-fix-refcount-underflow-from-error-handling-race.patch hid-lenovo-restrict-detection-of-patched-firmware-on.patch +net-mlx5e-honor-user-choice-of-ipsec-replay-window-s.patch +net-mlx5e-ensure-that-ipsec-sequence-packet-number-s.patch +net-mlx5e-unify-esw-and-normal-ipsec-status-table-cr.patch +net-mlx5e-tidy-up-ipsec-nat-t-sa-discovery.patch +net-mlx5e-reduce-eswitch-mode_lock-protection-contex.patch +net-mlx5e-check-the-number-of-elements-before-walk-t.patch +rdma-mlx5-send-events-from-ib-driver-about-device-af.patch +net-mlx5e-disable-ipsec-offload-support-if-not-fw-st.patch +net-mlx5e-fix-possible-deadlock-on-mlx5e_tx_timeout_.patch +net-mlx5e-tc-don-t-offload-post-action-rule-if-not-s.patch +net-mlx5-nack-sync-reset-request-when-hotplug-is-ena.patch +net-mlx5e-check-netdev-pointer-before-checking-its-n.patch +net-mlx5-fix-a-null-vs-is_err-check.patch +net-ipv6-support-reporting-otherwise-unknown-prefix-.patch +qca_debug-prevent-crash-on-tx-ring-changes.patch +qca_debug-fix-ethtool-g-iface-tx-behavior.patch +qca_spi-fix-reset-behavior.patch +bnxt_en-clear-resource-reservation-during-resume.patch +bnxt_en-fix-skb-recycling-logic-in-bnxt_deliver_skb.patch +bnxt_en-fix-wrong-return-value-check-in-bnxt_close_n.patch +bnxt_en-fix-hwtstamp_filter_all-packet-timestamp-log.patch +atm-solos-pci-fix-potential-deadlock-on-cli_queue_lo.patch +atm-solos-pci-fix-potential-deadlock-on-tx_queue_loc.patch +net-fec-correct-queue-selection.patch +octeontx2-af-fix-a-use-after-free-in-rvu_nix_registe.patch +net-sched-act_ct-take-per-cb-reference-to-tcf_ct_flo.patch +octeon_ep-explicitly-test-for-firmware-ready-value.patch +octeontx2-pf-fix-promisc-mcam-entry-action.patch +octeontx2-af-update-rss-algorithm-index.patch +octeontx2-af-fix-pause-frame-configuration.patch +atm-fix-use-after-free-in-do_vcc_ioctl.patch +net-rose-fix-use-after-free-in-rose_ioctl.patch +iavf-introduce-new-state-machines-for-flow-director.patch +iavf-handle-ntuple-on-off-based-on-new-state-machine.patch +iavf-fix-iavf_shutdown-to-call-iavf_remove-instead-i.patch +qed-fix-a-potential-use-after-free-in-qed_cxt_tables.patch +net-remove-acked-syn-flag-from-packet-in-the-transmi.patch +net-ena-destroy-correct-number-of-xdp-queues-upon-fa.patch +net-ena-fix-xdp-drops-handling-due-to-multibuf-packe.patch +net-ena-fix-dma-syncing-in-xdp-path-when-swiotlb-is-.patch +net-ena-fix-xdp-redirection-error.patch +stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch +sign-file-fix-incorrect-return-values-check.patch +vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch +dpaa2-switch-fix-size-of-the-dma_unmap.patch +dpaa2-switch-do-not-ask-for-mdb-vlan-and-fdb-replay.patch +net-stmmac-dwmac-qcom-ethqos-fix-drops-in-10m-sgmii-.patch +net-stmmac-handle-disabled-mdio-busses-from-devicetr.patch +appletalk-fix-use-after-free-in-atalk_ioctl.patch +net-atlantic-fix-double-free-in-ring-reinit-logic.patch diff --git a/queue-6.6/sign-file-fix-incorrect-return-values-check.patch b/queue-6.6/sign-file-fix-incorrect-return-values-check.patch new file mode 100644 index 00000000000..b667e9bd684 --- /dev/null +++ b/queue-6.6/sign-file-fix-incorrect-return-values-check.patch @@ -0,0 +1,79 @@ +From 2a479247e48da3c1bc77f8c28f0771e7f78a2730 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Dec 2023 10:31:10 +0000 +Subject: sign-file: Fix incorrect return values check + +From: Yusong Gao + +[ Upstream commit 829649443e78d85db0cff0c37cadb28fbb1a5f6f ] + +There are some wrong return values check in sign-file when call OpenSSL +API. The ERR() check cond is wrong because of the program only check the +return value is < 0 which ignored the return val is 0. For example: +1. CMS_final() return 1 for success or 0 for failure. +2. i2d_CMS_bio_stream() returns 1 for success or 0 for failure. +3. i2d_TYPEbio() return 1 for success and 0 for failure. +4. BIO_free() return 1 for success and 0 for failure. + +Link: https://www.openssl.org/docs/manmaster/man3/ +Fixes: e5a2e3c84782 ("scripts/sign-file.c: Add support for signing with a raw signature") +Signed-off-by: Yusong Gao +Reviewed-by: Juerg Haefliger +Signed-off-by: David Howells +Link: https://lore.kernel.org/r/20231213024405.624692-1-a869920004@gmail.com/ # v5 +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + scripts/sign-file.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index 598ef5465f825..3edb156ae52c3 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -322,7 +322,7 @@ int main(int argc, char **argv) + CMS_NOSMIMECAP | use_keyid | + use_signed_attrs), + "CMS_add1_signer"); +- ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0, ++ ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) != 1, + "CMS_final"); + + #else +@@ -341,10 +341,10 @@ int main(int argc, char **argv) + b = BIO_new_file(sig_file_name, "wb"); + ERR(!b, "%s", sig_file_name); + #ifndef USE_PKCS7 +- ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, ++ ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) != 1, + "%s", sig_file_name); + #else +- ERR(i2d_PKCS7_bio(b, pkcs7) < 0, ++ ERR(i2d_PKCS7_bio(b, pkcs7) != 1, + "%s", sig_file_name); + #endif + BIO_free(b); +@@ -374,9 +374,9 @@ int main(int argc, char **argv) + + if (!raw_sig) { + #ifndef USE_PKCS7 +- ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name); ++ ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) != 1, "%s", dest_name); + #else +- ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name); ++ ERR(i2d_PKCS7_bio(bd, pkcs7) != 1, "%s", dest_name); + #endif + } else { + BIO *b; +@@ -396,7 +396,7 @@ int main(int argc, char **argv) + ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name); + ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name); + +- ERR(BIO_free(bd) < 0, "%s", dest_name); ++ ERR(BIO_free(bd) != 1, "%s", dest_name); + + /* Finally, if we're signing in place, replace the original. */ + if (replace_orig) +-- +2.43.0 + diff --git a/queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch b/queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch new file mode 100644 index 00000000000..abc0dd61f5b --- /dev/null +++ b/queue-6.6/stmmac-dwmac-loongson-make-sure-mdio-is-initialized-.patch @@ -0,0 +1,53 @@ +From 68228ca82a5926205fb815722dc7799ccdf35b44 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 18:33:11 +0800 +Subject: stmmac: dwmac-loongson: Make sure MDIO is initialized before use + +From: Yanteng Si + +[ Upstream commit e87d3a1370ce9f04770d789bcf7cce44865d2e8d ] + +Generic code will use mdio. If it is not initialized before use, +the kernel will Oops. + +Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") +Signed-off-by: Yanteng Si +Signed-off-by: Feiyang Chen +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/stmicro/stmmac/dwmac-loongson.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +index 2cd6fce5c9934..e7701326adc6a 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +@@ -68,17 +68,15 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + if (!plat) + return -ENOMEM; + ++ plat->mdio_bus_data = devm_kzalloc(&pdev->dev, ++ sizeof(*plat->mdio_bus_data), ++ GFP_KERNEL); ++ if (!plat->mdio_bus_data) ++ return -ENOMEM; ++ + plat->mdio_node = of_get_child_by_name(np, "mdio"); + if (plat->mdio_node) { + dev_info(&pdev->dev, "Found MDIO subnode\n"); +- +- plat->mdio_bus_data = devm_kzalloc(&pdev->dev, +- sizeof(*plat->mdio_bus_data), +- GFP_KERNEL); +- if (!plat->mdio_bus_data) { +- ret = -ENOMEM; +- goto err_put_node; +- } + plat->mdio_bus_data->needs_reset = true; + } + +-- +2.43.0 + diff --git a/queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch b/queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch new file mode 100644 index 00000000000..5a4de4bc76d --- /dev/null +++ b/queue-6.6/vsock-virtio-fix-unsigned-integer-wrap-around-in-vir.patch @@ -0,0 +1,41 @@ +From 8d3c8fcaa6274c63657440f6b42281d6d7364201 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Dec 2023 19:23:17 +0300 +Subject: vsock/virtio: Fix unsigned integer wrap around in + virtio_transport_has_space() + +From: Nikolay Kuratov + +[ Upstream commit 60316d7f10b17a7ebb1ead0642fee8710e1560e0 ] + +We need to do signed arithmetic if we expect condition +`if (bytes < 0)` to be possible + +Found by Linux Verification Center (linuxtesting.org) with SVACE + +Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") +Signed-off-by: Nikolay Kuratov +Reviewed-by: Stefano Garzarella +Link: https://lore.kernel.org/r/20231211162317.4116625-1-kniv@yandex-team.ru +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/vmw_vsock/virtio_transport_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c +index 8bc272b6003bb..4084578b0b911 100644 +--- a/net/vmw_vsock/virtio_transport_common.c ++++ b/net/vmw_vsock/virtio_transport_common.c +@@ -679,7 +679,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk) + struct virtio_vsock_sock *vvs = vsk->trans; + s64 bytes; + +- bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); ++ bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + if (bytes < 0) + bytes = 0; + +-- +2.43.0 +