From: Sasha Levin Date: Sat, 24 May 2025 10:22:29 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v6.12.31~74 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=92be37d2e0aee34a22a2e967c59c94b5c8575915;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch b/queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch new file mode 100644 index 0000000000..7c3da26ed2 --- /dev/null +++ b/queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch @@ -0,0 +1,92 @@ +From 1094e07ac8cf907f3e873d790c503a880d953edd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 May 2025 15:00:30 -0400 +Subject: Bluetooth: L2CAP: Fix not checking l2cap_chan security level + +From: Luiz Augusto von Dentz + +[ Upstream commit 7af8479d9eb4319b4ba7b47a8c4d2c55af1c31e1 ] + +l2cap_check_enc_key_size shall check the security level of the +l2cap_chan rather than the hci_conn since for incoming connection +request that may be different as hci_conn may already been +encrypted using a different security level. + +Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/l2cap_core.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 222105e24d2d8..cb9b1edfcea2a 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -1561,7 +1561,8 @@ static void l2cap_request_info(struct l2cap_conn *conn) + sizeof(req), &req); + } + +-static bool l2cap_check_enc_key_size(struct hci_conn *hcon) ++static bool l2cap_check_enc_key_size(struct hci_conn *hcon, ++ struct l2cap_chan *chan) + { + /* The minimum encryption key size needs to be enforced by the + * host stack before establishing any L2CAP connections. The +@@ -1575,7 +1576,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) + int min_key_size = hcon->hdev->min_enc_key_size; + + /* On FIPS security level, key size must be 16 bytes */ +- if (hcon->sec_level == BT_SECURITY_FIPS) ++ if (chan->sec_level == BT_SECURITY_FIPS) + min_key_size = 16; + + return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || +@@ -1603,7 +1604,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) + !__l2cap_no_conn_pending(chan)) + return; + +- if (l2cap_check_enc_key_size(conn->hcon)) ++ if (l2cap_check_enc_key_size(conn->hcon, chan)) + l2cap_start_connection(chan); + else + __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); +@@ -1685,7 +1686,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) + continue; + } + +- if (l2cap_check_enc_key_size(conn->hcon)) ++ if (l2cap_check_enc_key_size(conn->hcon, chan)) + l2cap_start_connection(chan); + else + l2cap_chan_close(chan, ECONNREFUSED); +@@ -4187,7 +4188,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, + /* Check if the ACL is secure enough (if not SDP) */ + if (psm != cpu_to_le16(L2CAP_PSM_SDP) && + (!hci_conn_check_link_mode(conn->hcon) || +- !l2cap_check_enc_key_size(conn->hcon))) { ++ !l2cap_check_enc_key_size(conn->hcon, pchan))) { + conn->disc_reason = HCI_ERROR_AUTH_FAILURE; + result = L2CAP_CR_SEC_BLOCK; + goto response; +@@ -8418,7 +8419,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) + } + + if (chan->state == BT_CONNECT) { +- if (!status && l2cap_check_enc_key_size(hcon)) ++ if (!status && l2cap_check_enc_key_size(hcon, chan)) + l2cap_start_connection(chan); + else + __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); +@@ -8428,7 +8429,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) + struct l2cap_conn_rsp rsp; + __u16 res, stat; + +- if (!status && l2cap_check_enc_key_size(hcon)) { ++ if (!status && l2cap_check_enc_key_size(hcon, chan)) { + if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { + res = L2CAP_CR_PEND; + stat = L2CAP_CS_AUTHOR_PEND; +-- +2.39.5 + diff --git a/queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch b/queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch new file mode 100644 index 0000000000..f1ea670854 --- /dev/null +++ b/queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch @@ -0,0 +1,95 @@ +From 0bd57d44af1c9d6b3beb7172dd370a58d08d23e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 May 2025 11:48:48 +0300 +Subject: bridge: netfilter: Fix forwarding of fragmented packets + +From: Ido Schimmel + +[ Upstream commit 91b6dbced0ef1d680afdd69b14fc83d50ebafaf3 ] + +When netfilter defrag hooks are loaded (due to the presence of conntrack +rules, for example), fragmented packets entering the bridge will be +defragged by the bridge's pre-routing hook (br_nf_pre_routing() -> +ipv4_conntrack_defrag()). + +Later on, in the bridge's post-routing hook, the defragged packet will +be fragmented again. If the size of the largest fragment is larger than +what the kernel has determined as the destination MTU (using +ip_skb_dst_mtu()), the defragged packet will be dropped. + +Before commit ac6627a28dbf ("net: ipv4: Consolidate ipv4_mtu and +ip_dst_mtu_maybe_forward"), ip_skb_dst_mtu() would return dst_mtu() as +the destination MTU. Assuming the dst entry attached to the packet is +the bridge's fake rtable one, this would simply be the bridge's MTU (see +fake_mtu()). + +However, after above mentioned commit, ip_skb_dst_mtu() ends up +returning the route's MTU stored in the dst entry's metrics. Ideally, in +case the dst entry is the bridge's fake rtable one, this should be the +bridge's MTU as the bridge takes care of updating this metric when its +MTU changes (see br_change_mtu()). + +Unfortunately, the last operation is a no-op given the metrics attached +to the fake rtable entry are marked as read-only. Therefore, +ip_skb_dst_mtu() ends up returning 1500 (the initial MTU value) and +defragged packets are dropped during fragmentation when dealing with +large fragments and high MTU (e.g., 9k). + +Fix by moving the fake rtable entry's metrics to be per-bridge (in a +similar fashion to the fake rtable entry itself) and marking them as +writable, thereby allowing MTU changes to be reflected. + +Fixes: 62fa8a846d7d ("net: Implement read-only protection and COW'ing of metrics.") +Fixes: 33eb9873a283 ("bridge: initialize fake_rtable metrics") +Reported-by: Venkat Venkatsubra +Closes: https://lore.kernel.org/netdev/PH0PR10MB4504888284FF4CBA648197D0ACB82@PH0PR10MB4504.namprd10.prod.outlook.com/ +Tested-by: Venkat Venkatsubra +Signed-off-by: Ido Schimmel +Acked-by: Nikolay Aleksandrov +Link: https://patch.msgid.link/20250515084848.727706-1-idosch@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/bridge/br_nf_core.c | 7 ++----- + net/bridge/br_private.h | 1 + + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c +index 8c69f0c95a8ed..b8c8deb87407d 100644 +--- a/net/bridge/br_nf_core.c ++++ b/net/bridge/br_nf_core.c +@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = { + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. + */ +-static const u32 br_dst_default_metrics[RTAX_MAX] = { +- [RTAX_MTU - 1] = 1500, +-}; +- + void br_netfilter_rtable_init(struct net_bridge *br) + { + struct rtable *rt = &br->fake_rtable; + + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; +- dst_init_metrics(&rt->dst, br_dst_default_metrics, true); ++ dst_init_metrics(&rt->dst, br->metrics, false); ++ dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; + rt->dst.ops = &fake_dst_ops; + } +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h +index 940de95167689..19fb505492521 100644 +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -478,6 +478,7 @@ struct net_bridge { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; ++ u32 metrics[RTAX_MAX]; + #endif + u16 group_fwd_mask; + u16 group_fwd_mask_required; +-- +2.39.5 + diff --git a/queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch b/queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch new file mode 100644 index 0000000000..fdc6f5dfa5 --- /dev/null +++ b/queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch @@ -0,0 +1,131 @@ +From 1f4b2d4271c974e118c72b3fdcc68fc8991215a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 May 2025 13:06:31 +0100 +Subject: clk: sunxi-ng: d1: Add missing divider for MMC mod clocks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Andre Przywara + +[ Upstream commit 98e6da673cc6dd46ca9a599802bd2c8f83606710 ] + +The D1/R528/T113 SoCs have a hidden divider of 2 in the MMC mod clocks, +just as other recent SoCs. So far we did not describe that, which led +to the resulting MMC clock rate to be only half of its intended value. + +Use a macro that allows to describe a fixed post-divider, to compensate +for that divisor. + +This brings the MMC performance on those SoCs to its expected level, +so about 23 MB/s for SD cards, instead of the 11 MB/s measured so far. + +Fixes: 35b97bb94111 ("clk: sunxi-ng: Add support for the D1 SoC clocks") +Reported-by: Kuba Szczodrzyński +Signed-off-by: Andre Przywara +Link: https://patch.msgid.link/20250501120631.837186-1-andre.przywara@arm.com +Signed-off-by: Chen-Yu Tsai +Signed-off-by: Sasha Levin +--- + drivers/clk/sunxi-ng/ccu-sun20i-d1.c | 44 ++++++++++++++++------------ + drivers/clk/sunxi-ng/ccu_mp.h | 22 ++++++++++++++ + 2 files changed, 47 insertions(+), 19 deletions(-) + +diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c +index cb4bf038e17f5..89d8bf4a30a26 100644 +--- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c ++++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c +@@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = { + { .hw = &pll_periph0_2x_clk.common.hw }, + { .hw = &pll_audio1_div2_clk.common.hw }, + }; +-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830, +- 0, 4, /* M */ +- 8, 2, /* P */ +- 24, 3, /* mux */ +- BIT(31), /* gate */ +- 0); +- +-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834, +- 0, 4, /* M */ +- 8, 2, /* P */ +- 24, 3, /* mux */ +- BIT(31), /* gate */ +- 0); ++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0", ++ mmc0_mmc1_parents, 0x830, ++ 0, 4, /* M */ ++ 8, 2, /* P */ ++ 24, 3, /* mux */ ++ BIT(31), /* gate */ ++ 2, /* post-div */ ++ 0); ++ ++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", ++ mmc0_mmc1_parents, 0x834, ++ 0, 4, /* M */ ++ 8, 2, /* P */ ++ 24, 3, /* mux */ ++ BIT(31), /* gate */ ++ 2, /* post-div */ ++ 0); + + static const struct clk_parent_data mmc2_parents[] = { + { .fw_name = "hosc" }, +@@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = { + { .hw = &pll_periph0_800M_clk.common.hw }, + { .hw = &pll_audio1_div2_clk.common.hw }, + }; +-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838, +- 0, 4, /* M */ +- 8, 2, /* P */ +- 24, 3, /* mux */ +- BIT(31), /* gate */ +- 0); ++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents, ++ 0x838, ++ 0, 4, /* M */ ++ 8, 2, /* P */ ++ 24, 3, /* mux */ ++ BIT(31), /* gate */ ++ 2, /* post-div */ ++ 0); + + static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws, + 0x84c, BIT(0), 0); +diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h +index 6e50f3728fb5f..7d836a9fb3db3 100644 +--- a/drivers/clk/sunxi-ng/ccu_mp.h ++++ b/drivers/clk/sunxi-ng/ccu_mp.h +@@ -52,6 +52,28 @@ struct ccu_mp { + } \ + } + ++#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \ ++ _reg, \ ++ _mshift, _mwidth, \ ++ _pshift, _pwidth, \ ++ _muxshift, _muxwidth, \ ++ _gate, _postdiv, _flags)\ ++ struct ccu_mp _struct = { \ ++ .enable = _gate, \ ++ .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ ++ .p = _SUNXI_CCU_DIV(_pshift, _pwidth), \ ++ .mux = _SUNXI_CCU_MUX(_muxshift, _muxwidth), \ ++ .fixed_post_div = _postdiv, \ ++ .common = { \ ++ .reg = _reg, \ ++ .features = CCU_FEATURE_FIXED_POSTDIV, \ ++ .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, \ ++ _parents, \ ++ &ccu_mp_ops, \ ++ _flags), \ ++ } \ ++ } ++ + #define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg, \ + _mshift, _mwidth, \ + _pshift, _pwidth, \ +-- +2.39.5 + diff --git a/queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch b/queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch new file mode 100644 index 0000000000..6dc04e6570 --- /dev/null +++ b/queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch @@ -0,0 +1,263 @@ +From b80fed3040c7aeee82e2c2fc1fa204ba5347ba11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Apr 2023 13:31:35 -0700 +Subject: dmaengine: idxd: add idxd_copy_cr() to copy user completion record + during page fault handling + +From: Fenghua Yu + +[ Upstream commit b022f59725f0ae846191abbd6d2e611d7f60f826 ] + +Define idxd_copy_cr() to copy completion record to fault address in +user address that is found by work queue (wq) and PASID. + +It will be used to write the user's completion record that the hardware +device is not able to write due to user completion record page fault. + +An xarray is added to associate the PASID and mm with the +struct idxd_user_context so mm can be found by PASID and wq. + +It is called when handling the completion record fault in a kernel thread +context. Switch to the mm using kthread_use_vm() and copy the +completion record to the mm via copy_to_user(). Once the copy is +completed, switch back to the current mm using kthread_unuse_mm(). + +Suggested-by: Christoph Hellwig +Suggested-by: Jason Gunthorpe +Suggested-by: Tony Luck +Tested-by: Tony Zhu +Signed-off-by: Fenghua Yu +Reviewed-by: Dave Jiang +Link: https://lore.kernel.org/r/20230407203143.2189681-9-fenghua.yu@intel.com +Signed-off-by: Vinod Koul +Stable-dep-of: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces") +Signed-off-by: Sasha Levin +--- + drivers/dma/idxd/cdev.c | 107 +++++++++++++++++++++++++++++++++++++-- + drivers/dma/idxd/idxd.h | 6 +++ + drivers/dma/idxd/init.c | 2 + + drivers/dma/idxd/sysfs.c | 1 + + 4 files changed, 111 insertions(+), 5 deletions(-) + +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index e2a89873c6e1a..c7aa47f01df02 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -12,7 +12,9 @@ + #include + #include + #include ++#include + #include ++#include + #include "registers.h" + #include "idxd.h" + +@@ -35,6 +37,7 @@ struct idxd_user_context { + struct idxd_wq *wq; + struct task_struct *task; + unsigned int pasid; ++ struct mm_struct *mm; + unsigned int flags; + struct iommu_sva *sva; + }; +@@ -69,6 +72,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode) + return idxd_cdev->wq; + } + ++static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) ++{ ++ struct idxd_wq *wq = ctx->wq; ++ void *ptr; ++ ++ mutex_lock(&wq->uc_lock); ++ ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL); ++ if (ptr != (void *)ctx) ++ dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n", ++ ctx->pasid); ++ mutex_unlock(&wq->uc_lock); ++} ++ + static int idxd_cdev_open(struct inode *inode, struct file *filp) + { + struct idxd_user_context *ctx; +@@ -109,20 +125,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) + + pasid = iommu_sva_get_pasid(sva); + if (pasid == IOMMU_PASID_INVALID) { +- iommu_sva_unbind_device(sva); + rc = -EINVAL; +- goto failed; ++ goto failed_get_pasid; + } + + ctx->sva = sva; + ctx->pasid = pasid; ++ ctx->mm = current->mm; ++ ++ mutex_lock(&wq->uc_lock); ++ rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL); ++ mutex_unlock(&wq->uc_lock); ++ if (rc < 0) ++ dev_warn(dev, "PASID entry already exist in xarray.\n"); + + if (wq_dedicated(wq)) { + rc = idxd_wq_set_pasid(wq, pasid); + if (rc < 0) { + iommu_sva_unbind_device(sva); + dev_err(dev, "wq set pasid failed: %d\n", rc); +- goto failed; ++ goto failed_set_pasid; + } + } + } +@@ -131,7 +153,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) + mutex_unlock(&wq->wq_lock); + return 0; + +- failed: ++failed_set_pasid: ++ if (device_user_pasid_enabled(idxd)) ++ idxd_xa_pasid_remove(ctx); ++failed_get_pasid: ++ if (device_user_pasid_enabled(idxd)) ++ iommu_sva_unbind_device(sva); ++failed: + mutex_unlock(&wq->wq_lock); + kfree(ctx); + return rc; +@@ -162,8 +190,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) + } + } + +- if (ctx->sva) ++ if (ctx->sva) { + iommu_sva_unbind_device(ctx->sva); ++ idxd_xa_pasid_remove(ctx); ++ } + kfree(ctx); + mutex_lock(&wq->wq_lock); + idxd_wq_put(wq); +@@ -496,3 +526,70 @@ void idxd_cdev_remove(void) + ida_destroy(&ictx[i].minor_ida); + } + } ++ ++/** ++ * idxd_copy_cr - copy completion record to user address space found by wq and ++ * PASID ++ * @wq: work queue ++ * @pasid: PASID ++ * @addr: user fault address to write ++ * @cr: completion record ++ * @len: number of bytes to copy ++ * ++ * This is called by a work that handles completion record fault. ++ * ++ * Return: number of bytes copied. ++ */ ++int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, ++ void *cr, int len) ++{ ++ struct device *dev = &wq->idxd->pdev->dev; ++ int left = len, status_size = 1; ++ struct idxd_user_context *ctx; ++ struct mm_struct *mm; ++ ++ mutex_lock(&wq->uc_lock); ++ ++ ctx = xa_load(&wq->upasid_xa, pasid); ++ if (!ctx) { ++ dev_warn(dev, "No user context\n"); ++ goto out; ++ } ++ ++ mm = ctx->mm; ++ /* ++ * The completion record fault handling work is running in kernel ++ * thread context. It temporarily switches to the mm to copy cr ++ * to addr in the mm. ++ */ ++ kthread_use_mm(mm); ++ left = copy_to_user((void __user *)addr + status_size, cr + status_size, ++ len - status_size); ++ /* ++ * Copy status only after the rest of completion record is copied ++ * successfully so that the user gets the complete completion record ++ * when a non-zero status is polled. ++ */ ++ if (!left) { ++ u8 status; ++ ++ /* ++ * Ensure that the completion record's status field is written ++ * after the rest of the completion record has been written. ++ * This ensures that the user receives the correct completion ++ * record information once polling for a non-zero status. ++ */ ++ wmb(); ++ status = *(u8 *)cr; ++ if (put_user(status, (u8 __user *)addr)) ++ left += status_size; ++ } else { ++ left += status_size; ++ } ++ kthread_unuse_mm(mm); ++ ++out: ++ mutex_unlock(&wq->uc_lock); ++ ++ return len - left; ++} +diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h +index 5dbb67ff1c0cb..c3ace4aed0fc5 100644 +--- a/drivers/dma/idxd/idxd.h ++++ b/drivers/dma/idxd/idxd.h +@@ -215,6 +215,10 @@ struct idxd_wq { + char name[WQ_NAME_SIZE + 1]; + u64 max_xfer_bytes; + u32 max_batch_size; ++ ++ /* Lock to protect upasid_xa access. */ ++ struct mutex uc_lock; ++ struct xarray upasid_xa; + }; + + struct idxd_engine { +@@ -666,6 +670,8 @@ void idxd_cdev_remove(void); + int idxd_cdev_get_major(struct idxd_device *idxd); + int idxd_wq_add_cdev(struct idxd_wq *wq); + void idxd_wq_del_cdev(struct idxd_wq *wq); ++int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, ++ void *buf, int len); + + /* perfmon */ + #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) +diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c +index 7cb76db5ad600..ea651d5cf332d 100644 +--- a/drivers/dma/idxd/init.c ++++ b/drivers/dma/idxd/init.c +@@ -218,6 +218,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) + } + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); + } ++ mutex_init(&wq->uc_lock); ++ xa_init(&wq->upasid_xa); + idxd->wqs[i] = wq; + } + +diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c +index c811757d0f97f..0689464c4816a 100644 +--- a/drivers/dma/idxd/sysfs.c ++++ b/drivers/dma/idxd/sysfs.c +@@ -1315,6 +1315,7 @@ static void idxd_conf_wq_release(struct device *dev) + + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); ++ xa_destroy(&wq->upasid_xa); + kfree(wq); + } + +-- +2.39.5 + diff --git a/queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch b/queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch new file mode 100644 index 0000000000..4f51dfd3a4 --- /dev/null +++ b/queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch @@ -0,0 +1,78 @@ +From c3f6037a3b216750c6848bdee862a31168554b3c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Apr 2023 13:31:33 -0700 +Subject: dmaengine: idxd: add per DSA wq workqueue for processing cr faults + +From: Dave Jiang + +[ Upstream commit 2f30decd2f23a376d2ed73dfe4c601421edf501a ] + +Add a workqueue for user submitted completion record fault processing. +The workqueue creation and destruction lifetime will be tied to the user +sub-driver since it will only be used when the wq is a user type. + +Tested-by: Tony Zhu +Signed-off-by: Dave Jiang +Co-developed-by: Fenghua Yu +Signed-off-by: Fenghua Yu +Link: https://lore.kernel.org/r/20230407203143.2189681-7-fenghua.yu@intel.com +Signed-off-by: Vinod Koul +Stable-dep-of: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces") +Signed-off-by: Sasha Levin +--- + drivers/dma/idxd/cdev.c | 11 +++++++++++ + drivers/dma/idxd/idxd.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index 9f8adb7013eba..e2a89873c6e1a 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -408,6 +408,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) + } + + mutex_lock(&wq->wq_lock); ++ ++ wq->wq = create_workqueue(dev_name(wq_confdev(wq))); ++ if (!wq->wq) { ++ rc = -ENOMEM; ++ goto wq_err; ++ } ++ + wq->type = IDXD_WQT_USER; + rc = drv_enable_wq(wq); + if (rc < 0) +@@ -426,7 +433,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) + err_cdev: + drv_disable_wq(wq); + err: ++ destroy_workqueue(wq->wq); + wq->type = IDXD_WQT_NONE; ++wq_err: + mutex_unlock(&wq->wq_lock); + return rc; + } +@@ -439,6 +448,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) + idxd_wq_del_cdev(wq); + drv_disable_wq(wq); + wq->type = IDXD_WQT_NONE; ++ destroy_workqueue(wq->wq); ++ wq->wq = NULL; + mutex_unlock(&wq->wq_lock); + } + +diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h +index 14c6ef987fede..5dbb67ff1c0cb 100644 +--- a/drivers/dma/idxd/idxd.h ++++ b/drivers/dma/idxd/idxd.h +@@ -185,6 +185,7 @@ struct idxd_wq { + struct idxd_dev idxd_dev; + struct idxd_cdev *idxd_cdev; + struct wait_queue_head err_queue; ++ struct workqueue_struct *wq; + struct idxd_device *idxd; + int id; + struct idxd_irq_entry ie; +-- +2.39.5 + diff --git a/queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch b/queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch new file mode 100644 index 0000000000..8a47c6ba72 --- /dev/null +++ b/queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch @@ -0,0 +1,59 @@ +From f97bc5be0e30ea1ab9867cf1267f386554ca69f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 10:03:37 -0700 +Subject: dmaengine: idxd: Fix allowing write() from different address spaces + +From: Vinicius Costa Gomes + +[ Upstream commit 8dfa57aabff625bf445548257f7711ef294cd30e ] + +Check if the process submitting the descriptor belongs to the same +address space as the one that opened the file, reject otherwise. + +Fixes: 6827738dc684 ("dmaengine: idxd: add a write() method for applications to submit work") +Signed-off-by: Vinicius Costa Gomes +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/20250421170337.3008875-1-dave.jiang@intel.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/idxd/cdev.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index c7aa47f01df02..186f005bfa8fd 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -240,6 +240,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) + if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) + return -EPERM; + ++ if (current->mm != ctx->mm) ++ return -EPERM; ++ + rc = check_vma(wq, vma, __func__); + if (rc < 0) + return rc; +@@ -306,6 +309,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t + ssize_t written = 0; + int i; + ++ if (current->mm != ctx->mm) ++ return -EPERM; ++ + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { + int rc = idxd_submit_user_descriptor(ctx, udesc + i); + +@@ -326,6 +332,9 @@ static __poll_t idxd_cdev_poll(struct file *filp, + struct idxd_device *idxd = wq->idxd; + __poll_t out = 0; + ++ if (current->mm != ctx->mm) ++ return -EPERM; ++ + poll_wait(filp, &wq->err_queue, wait); + spin_lock(&idxd->dev_lock); + if (idxd->sw_err.valid) +-- +2.39.5 + diff --git a/queue-6.1/dmaengine-idxd-fix-poll-return-value.patch b/queue-6.1/dmaengine-idxd-fix-poll-return-value.patch new file mode 100644 index 0000000000..ab28f6c517 --- /dev/null +++ b/queue-6.1/dmaengine-idxd-fix-poll-return-value.patch @@ -0,0 +1,41 @@ +From 6972b6958b727bd67d562d755352feb080666629 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 May 2025 10:05:48 -0700 +Subject: dmaengine: idxd: Fix ->poll() return value + +From: Dave Jiang + +[ Upstream commit ae74cd15ade833adc289279b5c6f12e78f64d4d7 ] + +The fix to block access from different address space did not return a +correct value for ->poll() change. kernel test bot reported that a +return value of type __poll_t is expected rather than int. Fix to return +POLLNVAL to indicate invalid request. + +Fixes: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202505081851.rwD7jVxg-lkp@intel.com/ +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/20250508170548.2747425-1-dave.jiang@intel.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/idxd/cdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index 186f005bfa8fd..d736ab15ade24 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -333,7 +333,7 @@ static __poll_t idxd_cdev_poll(struct file *filp, + __poll_t out = 0; + + if (current->mm != ctx->mm) +- return -EPERM; ++ return POLLNVAL; + + poll_wait(filp, &wq->err_queue, wait); + spin_lock(&idxd->dev_lock); +-- +2.39.5 + diff --git a/queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch b/queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch new file mode 100644 index 0000000000..ef3a8289ca --- /dev/null +++ b/queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch @@ -0,0 +1,252 @@ +From f12d15deab91dea075c1396198801887c0aba2f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Apr 2025 15:59:57 +0200 +Subject: espintcp: remove encap socket caching to avoid reference leak + +From: Sabrina Dubroca + +[ Upstream commit 028363685bd0b7a19b4a820f82dd905b1dc83999 ] + +The current scheme for caching the encap socket can lead to reference +leaks when we try to delete the netns. + +The reference chain is: xfrm_state -> enacp_sk -> netns + +Since the encap socket is a userspace socket, it holds a reference on +the netns. If we delete the espintcp state (through flush or +individual delete) before removing the netns, the reference on the +socket is dropped and the netns is correctly deleted. Otherwise, the +netns may not be reachable anymore (if all processes within the ns +have terminated), so we cannot delete the xfrm state to drop its +reference on the socket. + +This patch results in a small (~2% in my tests) performance +regression. + +A GC-type mechanism could be added for the socket cache, to clear +references if the state hasn't been used "recently", but it's a lot +more complex than just not caching the socket. + +Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Simon Horman +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + include/net/xfrm.h | 1 - + net/ipv4/esp4.c | 49 ++++--------------------------------------- + net/ipv6/esp6.c | 49 ++++--------------------------------------- + net/xfrm/xfrm_state.c | 3 --- + 4 files changed, 8 insertions(+), 94 deletions(-) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index bf670929622dc..64911162ab5f4 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -212,7 +212,6 @@ struct xfrm_state { + + /* Data for encapsulator */ + struct xfrm_encap_tmpl *encap; +- struct sock __rcu *encap_sk; + + /* Data for care-of address */ + xfrm_address_t *coaddr; +diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c +index 419969b268225..8f5417ff355d7 100644 +--- a/net/ipv4/esp4.c ++++ b/net/ipv4/esp4.c +@@ -118,47 +118,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) + } + + #ifdef CONFIG_INET_ESPINTCP +-struct esp_tcp_sk { +- struct sock *sk; +- struct rcu_head rcu; +-}; +- +-static void esp_free_tcp_sk(struct rcu_head *head) +-{ +- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); +- +- sock_put(esk->sk); +- kfree(esk); +-} +- + static struct sock *esp_find_tcp_sk(struct xfrm_state *x) + { + struct xfrm_encap_tmpl *encap = x->encap; + struct net *net = xs_net(x); +- struct esp_tcp_sk *esk; + __be16 sport, dport; +- struct sock *nsk; + struct sock *sk; + +- sk = rcu_dereference(x->encap_sk); +- if (sk && sk->sk_state == TCP_ESTABLISHED) +- return sk; +- + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; +- nsk = rcu_dereference_protected(x->encap_sk, +- lockdep_is_held(&x->lock)); +- if (sk && sk == nsk) { +- esk = kmalloc(sizeof(*esk), GFP_ATOMIC); +- if (!esk) { +- spin_unlock_bh(&x->lock); +- return ERR_PTR(-ENOMEM); +- } +- RCU_INIT_POINTER(x->encap_sk, NULL); +- esk->sk = sk; +- call_rcu(&esk->rcu, esp_free_tcp_sk); +- } + spin_unlock_bh(&x->lock); + + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, +@@ -171,20 +140,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) + return ERR_PTR(-EINVAL); + } + +- spin_lock_bh(&x->lock); +- nsk = rcu_dereference_protected(x->encap_sk, +- lockdep_is_held(&x->lock)); +- if (encap->encap_sport != sport || +- encap->encap_dport != dport) { +- sock_put(sk); +- sk = nsk ?: ERR_PTR(-EREMCHG); +- } else if (sk == nsk) { +- sock_put(sk); +- } else { +- rcu_assign_pointer(x->encap_sk, sk); +- } +- spin_unlock_bh(&x->lock); +- + return sk; + } + +@@ -207,6 +162,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + ++ sock_put(sk); ++ + out: + rcu_read_unlock(); + return err; +@@ -391,6 +348,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, + if (IS_ERR(sk)) + return ERR_CAST(sk); + ++ sock_put(sk); ++ + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + +diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c +index a021c88d3d9b8..085a83b807afd 100644 +--- a/net/ipv6/esp6.c ++++ b/net/ipv6/esp6.c +@@ -135,47 +135,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) + } + + #ifdef CONFIG_INET6_ESPINTCP +-struct esp_tcp_sk { +- struct sock *sk; +- struct rcu_head rcu; +-}; +- +-static void esp_free_tcp_sk(struct rcu_head *head) +-{ +- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); +- +- sock_put(esk->sk); +- kfree(esk); +-} +- + static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) + { + struct xfrm_encap_tmpl *encap = x->encap; + struct net *net = xs_net(x); +- struct esp_tcp_sk *esk; + __be16 sport, dport; +- struct sock *nsk; + struct sock *sk; + +- sk = rcu_dereference(x->encap_sk); +- if (sk && sk->sk_state == TCP_ESTABLISHED) +- return sk; +- + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; +- nsk = rcu_dereference_protected(x->encap_sk, +- lockdep_is_held(&x->lock)); +- if (sk && sk == nsk) { +- esk = kmalloc(sizeof(*esk), GFP_ATOMIC); +- if (!esk) { +- spin_unlock_bh(&x->lock); +- return ERR_PTR(-ENOMEM); +- } +- RCU_INIT_POINTER(x->encap_sk, NULL); +- esk->sk = sk; +- call_rcu(&esk->rcu, esp_free_tcp_sk); +- } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, +@@ -188,20 +157,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) + return ERR_PTR(-EINVAL); + } + +- spin_lock_bh(&x->lock); +- nsk = rcu_dereference_protected(x->encap_sk, +- lockdep_is_held(&x->lock)); +- if (encap->encap_sport != sport || +- encap->encap_dport != dport) { +- sock_put(sk); +- sk = nsk ?: ERR_PTR(-EREMCHG); +- } else if (sk == nsk) { +- sock_put(sk); +- } else { +- rcu_assign_pointer(x->encap_sk, sk); +- } +- spin_unlock_bh(&x->lock); +- + return sk; + } + +@@ -224,6 +179,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + ++ sock_put(sk); ++ + out: + rcu_read_unlock(); + return err; +@@ -427,6 +384,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + if (IS_ERR(sk)) + return ERR_CAST(sk); + ++ sock_put(sk); ++ + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index 2f4cf976b59a3..b5047a94c7d01 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -694,9 +694,6 @@ int __xfrm_state_delete(struct xfrm_state *x) + net->xfrm.state_num--; + spin_unlock(&net->xfrm.xfrm_state_lock); + +- if (x->encap_sk) +- sock_put(rcu_dereference_raw(x->encap_sk)); +- + xfrm_dev_state_delete(x); + + /* All xfrm_state objects are created by xfrm_state_alloc. +-- +2.39.5 + diff --git a/queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch b/queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch new file mode 100644 index 0000000000..3394fc36d4 --- /dev/null +++ b/queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch @@ -0,0 +1,53 @@ +From 22eb6ab71da9d347ceba6af60163e681430085d0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Apr 2025 11:13:52 -0700 +Subject: ice: fix vf->num_mac count with port representors + +From: Jacob Keller + +[ Upstream commit bbd95160a03dbfcd01a541f25c27ddb730dfbbd5 ] + +The ice_vc_repr_add_mac() function indicates that it does not store the MAC +address filters in the firmware. However, it still increments vf->num_mac. +This is incorrect, as vf->num_mac should represent the number of MAC +filters currently programmed to firmware. + +Indeed, we only perform this increment if the requested filter is a unicast +address that doesn't match the existing vf->hw_lan_addr. In addition, +ice_vc_repr_del_mac() does not decrement the vf->num_mac counter. This +results in the counter becoming out of sync with the actual count. + +As it turns out, vf->num_mac is currently only used in legacy made without +port representors. The single place where the value is checked is for +enforcing a filter limit on untrusted VFs. + +Upcoming patches to support VF Live Migration will use this value when +determining the size of the TLV for MAC address filters. Fix the +representor mode function to stop incrementing the counter incorrectly. + +Fixes: ac19e03ef780 ("ice: allow process VF opcodes in different ways") +Signed-off-by: Jacob Keller +Reviewed-by: Michal Swiatkowski +Reviewed-by: Simon Horman +Tested-by: Sujai Buvaneswaran +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_virtchnl.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +index 42d8e5e771b7e..fa9d928081d63 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +@@ -3551,7 +3551,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) + } + + ice_vfhw_mac_add(vf, &al->list[i]); +- vf->num_mac++; + break; + } + +-- +2.39.5 + diff --git a/queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch b/queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch new file mode 100644 index 0000000000..9c8336b25c --- /dev/null +++ b/queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch @@ -0,0 +1,38 @@ +From fa5549c99e2ae67200753c34fe522451ce751cc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 17 May 2025 13:27:37 +0100 +Subject: io_uring: fix overflow resched cqe reordering + +From: Pavel Begunkov + +[ Upstream commit a7d755ed9ce9738af3db602eb29d32774a180bc7 ] + +Leaving the CQ critical section in the middle of a overflow flushing +can cause cqe reordering since the cache cq pointers are reset and any +new cqe emitters that might get called in between are not going to be +forced into io_cqe_cache_refill(). + +Fixes: eac2ca2d682f9 ("io_uring: check if we need to reschedule during overflow flush") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/90ba817f1a458f091f355f407de1c911d2b93bbf.1747483784.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index f39d66589180e..ad462724246a7 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -627,6 +627,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) + * to care for a non-real case. + */ + if (need_resched()) { ++ ctx->cqe_sentinel = ctx->cqe_cached; + io_cq_unlock_post(ctx); + mutex_unlock(&ctx->uring_lock); + cond_resched(); +-- +2.39.5 + diff --git a/queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch b/queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch new file mode 100644 index 0000000000..1a41df43fb --- /dev/null +++ b/queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch @@ -0,0 +1,48 @@ +From 79260d384fbd3db3c98166d5be9c99fc5d90bf37 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 May 2025 18:49:36 +0200 +Subject: net: dwmac-sun8i: Use parsed internal PHY address instead of 1 + +From: Paul Kocialkowski + +[ Upstream commit 47653e4243f2b0a26372e481ca098936b51ec3a8 ] + +While the MDIO address of the internal PHY on Allwinner sun8i chips is +generally 1, of_mdio_parse_addr is used to cleanly parse the address +from the device-tree instead of hardcoding it. + +A commit reworking the code ditched the parsed value and hardcoded the +value 1 instead, which didn't really break anything but is more fragile +and not future-proof. + +Restore the initial behavior using the parsed address returned from the +helper. + +Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") +Signed-off-by: Paul Kocialkowski +Reviewed-by: Andrew Lunn +Acked-by: Corentin LABBE +Tested-by: Corentin LABBE +Link: https://patch.msgid.link/20250519164936.4172658-1-paulk@sys-base.io +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +index f834472599f75..0921b78c6244f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +@@ -948,7 +948,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, + /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY + * address. No need to mask it again. + */ +- reg |= 1 << H3_EPHY_ADDR_SHIFT; ++ reg |= ret << H3_EPHY_ADDR_SHIFT; + } else { + /* For SoCs without internal PHY the PHY selection bit should be + * set to 0 (external PHY). +-- +2.39.5 + diff --git a/queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch b/queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch new file mode 100644 index 0000000000..2fc710184e --- /dev/null +++ b/queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch @@ -0,0 +1,92 @@ +From e1dfb6e5d724069d5f0afcfb735055ee10231eb6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 May 2025 09:27:19 +0530 +Subject: net: lan743x: Restore SGMII CTRL register on resume + +From: Thangaraj Samynathan + +[ Upstream commit 293e38ff4e4c2ba53f3fd47d8a4a9f0f0414a7a6 ] + +SGMII_CTRL register, which specifies the active interface, was not +properly restored when resuming from suspend. This led to incorrect +interface selection after resume particularly in scenarios involving +the FPGA. + +To fix this: +- Move the SGMII_CTRL setup out of the probe function. +- Initialize the register in the hardware initialization helper function, +which is called during both device initialization and resume. + +This ensures the interface configuration is consistently restored after +suspend/resume cycles. + +Fixes: a46d9d37c4f4f ("net: lan743x: Add support for SGMII interface") +Signed-off-by: Thangaraj Samynathan +Link: https://patch.msgid.link/20250516035719.117960-1-thangaraj.s@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/lan743x_main.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c +index 2e69ba0143b15..fd35554191793 100644 +--- a/drivers/net/ethernet/microchip/lan743x_main.c ++++ b/drivers/net/ethernet/microchip/lan743x_main.c +@@ -3253,6 +3253,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, + struct pci_dev *pdev) + { + struct lan743x_tx *tx; ++ u32 sgmii_ctl; + int index; + int ret; + +@@ -3265,6 +3266,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, + spin_lock_init(&adapter->eth_syslock_spinlock); + mutex_init(&adapter->sgmii_rw_lock); + pci11x1x_set_rfe_rd_fifo_threshold(adapter); ++ sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); ++ if (adapter->is_sgmii_en) { ++ sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; ++ sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; ++ } else { ++ sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; ++ sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; ++ } ++ lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); + } else { + adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; + adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; +@@ -3313,7 +3323,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, + + static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) + { +- u32 sgmii_ctl; + int ret; + + adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); +@@ -3325,10 +3334,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) + adapter->mdiobus->priv = (void *)adapter; + if (adapter->is_pci11x1x) { + if (adapter->is_sgmii_en) { +- sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); +- sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; +- sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; +- lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); + netif_dbg(adapter, drv, adapter->netdev, + "SGMII operation\n"); + adapter->mdiobus->probe_capabilities = MDIOBUS_C22_C45; +@@ -3338,10 +3343,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) + netif_dbg(adapter, drv, adapter->netdev, + "lan743x-mdiobus-c45\n"); + } else { +- sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); +- sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; +- sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; +- lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); + netif_dbg(adapter, drv, adapter->netdev, + "RGMII operation\n"); + // Only C22 support when RGMII I/F +-- +2.39.5 + diff --git a/queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch b/queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch new file mode 100644 index 0000000000..3f62daf058 --- /dev/null +++ b/queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch @@ -0,0 +1,125 @@ +From 1bc898f32032b96012c3ade397c3cdd3f986702b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 May 2025 18:14:04 +0800 +Subject: net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done + +From: Wang Liang + +[ Upstream commit e279024617134c94fd3e37470156534d5f2b3472 ] + +Syzbot reported a slab-use-after-free with the following call trace: + + ================================================================== + BUG: KASAN: slab-use-after-free in tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 + Read of size 8 at addr ffff88807a733000 by task kworker/1:0/25 + + Call Trace: + kasan_report+0xd9/0x110 mm/kasan/report.c:601 + tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 + crypto_request_complete include/crypto/algapi.h:266 + aead_request_complete include/crypto/internal/aead.h:85 + cryptd_aead_crypt+0x3b8/0x750 crypto/cryptd.c:772 + crypto_request_complete include/crypto/algapi.h:266 + cryptd_queue_worker+0x131/0x200 crypto/cryptd.c:181 + process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 + + Allocated by task 8355: + kzalloc_noprof include/linux/slab.h:778 + tipc_crypto_start+0xcc/0x9e0 net/tipc/crypto.c:1466 + tipc_init_net+0x2dd/0x430 net/tipc/core.c:72 + ops_init+0xb9/0x650 net/core/net_namespace.c:139 + setup_net+0x435/0xb40 net/core/net_namespace.c:343 + copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 + create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228 + ksys_unshare+0x419/0x970 kernel/fork.c:3323 + __do_sys_unshare kernel/fork.c:3394 + + Freed by task 63: + kfree+0x12a/0x3b0 mm/slub.c:4557 + tipc_crypto_stop+0x23c/0x500 net/tipc/crypto.c:1539 + tipc_exit_net+0x8c/0x110 net/tipc/core.c:119 + ops_exit_list+0xb0/0x180 net/core/net_namespace.c:173 + cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640 + process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 + +After freed the tipc_crypto tx by delete namespace, tipc_aead_encrypt_done +may still visit it in cryptd_queue_worker workqueue. + +I reproduce this issue by: + ip netns add ns1 + ip link add veth1 type veth peer name veth2 + ip link set veth1 netns ns1 + ip netns exec ns1 tipc bearer enable media eth dev veth1 + ip netns exec ns1 tipc node set key this_is_a_master_key master + ip netns exec ns1 tipc bearer disable media eth dev veth1 + ip netns del ns1 + +The key of reproduction is that, simd_aead_encrypt is interrupted, leading +to crypto_simd_usable() return false. Thus, the cryptd_queue_worker is +triggered, and the tipc_crypto tx will be visited. + + tipc_disc_timeout + tipc_bearer_xmit_skb + tipc_crypto_xmit + tipc_aead_encrypt + crypto_aead_encrypt + // encrypt() + simd_aead_encrypt + // crypto_simd_usable() is false + child = &ctx->cryptd_tfm->base; + + simd_aead_encrypt + crypto_aead_encrypt + // encrypt() + cryptd_aead_encrypt_enqueue + cryptd_aead_enqueue + cryptd_enqueue_request + // trigger cryptd_queue_worker + queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work) + +Fix this by holding net reference count before encrypt. + +Reported-by: syzbot+55c12726619ff85ce1f6@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=55c12726619ff85ce1f6 +Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") +Signed-off-by: Wang Liang +Link: https://patch.msgid.link/20250520101404.1341730-1-wangliang74@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/tipc/crypto.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c +index 25c18f8783ce9..a9c02fac039b5 100644 +--- a/net/tipc/crypto.c ++++ b/net/tipc/crypto.c +@@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + goto exit; + } + ++ /* Get net to avoid freed tipc_crypto when delete namespace */ ++ get_net(aead->crypto->net); ++ + /* Now, do encrypt */ + rc = crypto_aead_encrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); ++ put_net(aead->crypto->net); + + exit: + kfree(ctx); +@@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) + kfree(tx_ctx); + tipc_bearer_put(b); + tipc_aead_put(aead); ++ put_net(net); + } + + /** +-- +2.39.5 + diff --git a/queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch b/queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch new file mode 100644 index 0000000000..5383012367 --- /dev/null +++ b/queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch @@ -0,0 +1,111 @@ +From d029cfff9be19b26fbc810d68f78b4a72d09fd7c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 May 2025 11:38:34 +0530 +Subject: octeontx2-af: Fix APR entry mapping based on APR_LMT_CFG + +From: Geetha sowjanya + +[ Upstream commit a6ae7129819ad20788e610261246e71736543b8b ] + +The current implementation maps the APR table using a fixed size, +which can lead to incorrect mapping when the number of PFs and VFs +varies. +This patch corrects the mapping by calculating the APR table +size dynamically based on the values configured in the +APR_LMT_CFG register, ensuring accurate representation +of APR entries in debugfs. + +Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table"). +Signed-off-by: Geetha sowjanya +Link: https://patch.msgid.link/20250521060834.19780-3-gakula@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 9 ++++++--- + .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 11 ++++++++--- + 2 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +index 6ec0609074dca..5cd45846237e2 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +@@ -13,7 +13,6 @@ + /* RVU LMTST */ + #define LMT_TBL_OP_READ 0 + #define LMT_TBL_OP_WRITE 1 +-#define LMT_MAP_TABLE_SIZE (128 * 1024) + #define LMT_MAPTBL_ENTRY_SIZE 16 + #define LMT_MAX_VFS 256 + +@@ -26,10 +25,14 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, + { + void __iomem *lmt_map_base; + u64 tbl_base, cfg; ++ int pfs, vfs; + + tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); ++ cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); ++ vfs = 1 << (cfg & 0xF); ++ pfs = 1 << ((cfg >> 4) & 0x7); + +- lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); ++ lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); + if (!lmt_map_base) { + dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); + return -ENOMEM; +@@ -80,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, + + mutex_lock(&rvu->rsrc_lock); + rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); +- pf = rvu_get_pf(pcifunc) & 0x1F; ++ pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; + val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | + ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); + rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +index a3c1d82032f55..aa2ab987eb752 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +@@ -580,6 +580,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, + u64 lmt_addr, val, tbl_base; + int pf, vf, num_vfs, hw_vfs; + void __iomem *lmt_map_base; ++ int apr_pfs, apr_vfs; + int buf_size = 10240; + size_t off = 0; + int index = 0; +@@ -595,8 +596,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, + return -ENOMEM; + + tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); ++ val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); ++ apr_vfs = 1 << (val & 0xF); ++ apr_pfs = 1 << ((val >> 4) & 0x7); + +- lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); ++ lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * ++ LMT_MAPTBL_ENTRY_SIZE); + if (!lmt_map_base) { + dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); + kfree(buf); +@@ -618,7 +623,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, + off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", + pf); + +- index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; ++ index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; + off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", + (tbl_base + index)); + lmt_addr = readq(lmt_map_base + index); +@@ -631,7 +636,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, + /* Reading num of VFs per PF */ + rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); + for (vf = 0; vf < num_vfs; vf++) { +- index = (pf * rvu->hw->total_vfs * 16) + ++ index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); + off += scnprintf(&buf[off], buf_size - 1 - off, + "PF%d:VF%d \t\t", pf, vf); +-- +2.39.5 + diff --git a/queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch b/queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch new file mode 100644 index 0000000000..013f8f8c59 --- /dev/null +++ b/queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch @@ -0,0 +1,76 @@ +From dfd0231df3e4b1b886b044e375e07c68c3bdb3a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 May 2025 11:38:33 +0530 +Subject: octeontx2-af: Set LMT_ENA bit for APR table entries + +From: Subbaraya Sundeep + +[ Upstream commit 0eefa27b493306928d88af6368193b134c98fd64 ] + +This patch enables the LMT line for a PF/VF by setting the +LMT_ENA bit in the APR_LMT_MAP_ENTRY_S structure. + +Additionally, it simplifies the logic for calculating the +LMTST table index by consistently using the maximum +number of hw supported VFs (i.e., 256). + +Fixes: 873a1e3d207a ("octeontx2-af: cn10k: Setting up lmtst map table"). +Signed-off-by: Subbaraya Sundeep +Signed-off-by: Geetha sowjanya +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/20250521060834.19780-2-gakula@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +index f9faa5b23bb9d..6ec0609074dca 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +@@ -15,13 +15,17 @@ + #define LMT_TBL_OP_WRITE 1 + #define LMT_MAP_TABLE_SIZE (128 * 1024) + #define LMT_MAPTBL_ENTRY_SIZE 16 ++#define LMT_MAX_VFS 256 ++ ++#define LMT_MAP_ENTRY_ENA BIT_ULL(20) ++#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) + + /* Function to perform operations (read/write) on lmtst map table */ + static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, + int lmt_tbl_op) + { + void __iomem *lmt_map_base; +- u64 tbl_base; ++ u64 tbl_base, cfg; + + tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + +@@ -35,6 +39,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, + *val = readq(lmt_map_base + index); + } else { + writeq((*val), (lmt_map_base + index)); ++ ++ cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); ++ /* 2048 LMTLINES */ ++ cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); ++ ++ writeq(cfg, (lmt_map_base + (index + 8))); ++ + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S + * changes effective. Write 1 for flush and read is being used as a + * barrier and sets up a data dependency. Write to 0 after a write +@@ -52,7 +63,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, + #define LMT_MAP_TBL_W1_OFF 8 + static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) + { +- return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + ++ return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; + } + +-- +2.39.5 + diff --git a/queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch b/queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch new file mode 100644 index 0000000000..30710bc92f --- /dev/null +++ b/queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch @@ -0,0 +1,51 @@ +From 203e22ead866fb870b6b82d04c20571491035768 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Feb 2025 11:01:37 +0530 +Subject: octeontx2-pf: Add AF_XDP non-zero copy support + +From: Suman Ghosh + +[ Upstream commit b4164de5041b51cda3438e75bce668e2556057c3 ] + +Set xdp rx ring memory type as MEM_TYPE_PAGE_POOL for +af-xdp to work. This is needed since xdp_return_frame +internally will use page pools. + +Fixes: 06059a1a9a4a ("octeontx2-pf: Add XDP support to netdev PF") +Signed-off-by: Suman Ghosh +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +index 5e11599d13223..59a7e6f376f47 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -988,6 +988,7 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) + int err, pool_id, non_xdp_queues; + struct nix_aq_enq_req *aq; + struct otx2_cq_queue *cq; ++ struct otx2_pool *pool; + + cq = &qset->cq[qidx]; + cq->cq_idx = qidx; +@@ -996,8 +997,13 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) + cq->cq_type = CQ_RX; + cq->cint_idx = qidx; + cq->cqe_cnt = qset->rqe_cnt; +- if (pfvf->xdp_prog) ++ if (pfvf->xdp_prog) { ++ pool = &qset->pool[qidx]; + xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0); ++ xdp_rxq_info_reg_mem_model(&cq->xdp_rxq, ++ MEM_TYPE_PAGE_POOL, ++ pool->page_pool); ++ } + } else if (qidx < non_xdp_queues) { + cq->cq_type = CQ_TX; + cq->cint_idx = qidx - pfvf->hw.rx_queues; +-- +2.39.5 + diff --git a/queue-6.1/octeontx2-pf-add-support-for-page-pool.patch b/queue-6.1/octeontx2-pf-add-support-for-page-pool.patch new file mode 100644 index 0000000000..d52e09f794 --- /dev/null +++ b/queue-6.1/octeontx2-pf-add-support-for-page-pool.patch @@ -0,0 +1,365 @@ +From 06c9a36ad9e3c22f0e5169626a0d9d56b5c56f87 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 May 2023 07:34:04 +0530 +Subject: octeontx2-pf: Add support for page pool + +From: Ratheesh Kannoth + +[ Upstream commit b2e3406a38f0f48b1dfb81e5bb73d243ff6af179 ] + +Page pool for each rx queue enhance rx side performance +by reclaiming buffers back to each queue specific pool. DMA +mapping is done only for first allocation of buffers. +As subsequent buffers allocation avoid DMA mapping, +it results in performance improvement. + +Image | Performance +------------ | ------------ +Vannila | 3Mpps + | +with this | 42Mpps +change | +--------------------------- + +Signed-off-by: Ratheesh Kannoth +Link: https://lore.kernel.org/r/20230522020404.152020-1-rkannoth@marvell.com +Signed-off-by: Paolo Abeni +Stable-dep-of: b4164de5041b ("octeontx2-pf: Add AF_XDP non-zero copy support") +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeontx2/Kconfig | 1 + + .../marvell/octeontx2/nic/otx2_common.c | 78 ++++++++++++++++--- + .../marvell/octeontx2/nic/otx2_common.h | 6 +- + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++- + .../marvell/octeontx2/nic/otx2_txrx.c | 19 +++-- + .../marvell/octeontx2/nic/otx2_txrx.h | 1 + + .../ethernet/marvell/octeontx2/nic/qos_sq.c | 2 +- + 7 files changed, 96 insertions(+), 22 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig +index 993ac180a5db8..a32d85d6f599f 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/Kconfig ++++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig +@@ -32,6 +32,7 @@ config OCTEONTX2_PF + tristate "Marvell OcteonTX2 NIC Physical Function driver" + select OCTEONTX2_MBOX + select NET_DEVLINK ++ select PAGE_POOL + depends on (64BIT && COMPILE_TEST) || ARM64 + select DIMLIB + depends on PCI +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +index d05f91f97a9af..5e11599d13223 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -513,11 +513,32 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx) + (pfvf->hw.cq_ecount_wait - 1)); + } + ++static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, ++ dma_addr_t *dma) ++{ ++ unsigned int offset = 0; ++ struct page *page; ++ size_t sz; ++ ++ sz = SKB_DATA_ALIGN(pool->rbsize); ++ sz = ALIGN(sz, OTX2_ALIGN); ++ ++ page = page_pool_alloc_frag(pool->page_pool, &offset, sz, GFP_ATOMIC); ++ if (unlikely(!page)) ++ return -ENOMEM; ++ ++ *dma = page_pool_get_dma_addr(page) + offset; ++ return 0; ++} ++ + static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) + { + u8 *buf; + ++ if (pool->page_pool) ++ return otx2_alloc_pool_buf(pfvf, pool, dma); ++ + buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); + if (unlikely(!buf)) + return -ENOMEM; +@@ -1206,10 +1227,31 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) + } + } + ++void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, ++ u64 iova, int size) ++{ ++ struct page *page; ++ u64 pa; ++ ++ pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); ++ page = virt_to_head_page(phys_to_virt(pa)); ++ ++ if (pool->page_pool) { ++ page_pool_put_full_page(pool->page_pool, page, true); ++ } else { ++ dma_unmap_page_attrs(pfvf->dev, iova, size, ++ DMA_FROM_DEVICE, ++ DMA_ATTR_SKIP_CPU_SYNC); ++ ++ put_page(page); ++ } ++} ++ + void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) + { + int pool_id, pool_start = 0, pool_end = 0, size = 0; +- u64 iova, pa; ++ struct otx2_pool *pool; ++ u64 iova; + + if (type == AURA_NIX_SQ) { + pool_start = otx2_get_pool_idx(pfvf, type, 0); +@@ -1225,15 +1267,13 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) + /* Free SQB and RQB pointers from the aura pool */ + for (pool_id = pool_start; pool_id < pool_end; pool_id++) { + iova = otx2_aura_allocptr(pfvf, pool_id); ++ pool = &pfvf->qset.pool[pool_id]; + while (iova) { + if (type == AURA_NIX_RQ) + iova -= OTX2_HEAD_ROOM; + +- pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); +- dma_unmap_page_attrs(pfvf->dev, iova, size, +- DMA_FROM_DEVICE, +- DMA_ATTR_SKIP_CPU_SYNC); +- put_page(virt_to_page(phys_to_virt(pa))); ++ otx2_free_bufs(pfvf, pool, iova, size); ++ + iova = otx2_aura_allocptr(pfvf, pool_id); + } + } +@@ -1251,6 +1291,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) + pool = &pfvf->qset.pool[pool_id]; + qmem_free(pfvf->dev, pool->stack); + qmem_free(pfvf->dev, pool->fc_addr); ++ page_pool_destroy(pool->page_pool); ++ pool->page_pool = NULL; + } + devm_kfree(pfvf->dev, pfvf->qset.pool); + pfvf->qset.pool = NULL; +@@ -1334,8 +1376,9 @@ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + } + + int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, +- int stack_pages, int numptrs, int buf_size) ++ int stack_pages, int numptrs, int buf_size, int type) + { ++ struct page_pool_params pp_params = { 0 }; + struct npa_aq_enq_req *aq; + struct otx2_pool *pool; + int err; +@@ -1379,6 +1422,22 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, + aq->ctype = NPA_AQ_CTYPE_POOL; + aq->op = NPA_AQ_INSTOP_INIT; + ++ if (type != AURA_NIX_RQ) { ++ pool->page_pool = NULL; ++ return 0; ++ } ++ ++ pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; ++ pp_params.pool_size = numptrs; ++ pp_params.nid = NUMA_NO_NODE; ++ pp_params.dev = pfvf->dev; ++ pp_params.dma_dir = DMA_FROM_DEVICE; ++ pool->page_pool = page_pool_create(&pp_params); ++ if (IS_ERR(pool->page_pool)) { ++ netdev_err(pfvf->netdev, "Creation of page pool failed\n"); ++ return PTR_ERR(pool->page_pool); ++ } ++ + return 0; + } + +@@ -1413,7 +1472,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) + + /* Initialize pool context */ + err = otx2_pool_init(pfvf, pool_id, stack_pages, +- num_sqbs, hw->sqb_size); ++ num_sqbs, hw->sqb_size, AURA_NIX_SQ); + if (err) + goto fail; + } +@@ -1476,7 +1535,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) + } + for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { + err = otx2_pool_init(pfvf, pool_id, stack_pages, +- num_ptrs, pfvf->rbsize); ++ num_ptrs, pfvf->rbsize, AURA_NIX_RQ); + if (err) + goto fail; + } +@@ -1660,7 +1719,6 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) + req->bpid_per_chan = 0; + #endif + +- + return otx2_sync_mbox_msg(&pfvf->mbox); + } + EXPORT_SYMBOL(otx2_nix_config_bp); +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +index c15d1864a6371..4f0ac8158ed12 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +@@ -934,7 +934,7 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); + void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); + int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); +-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); ++void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx); + void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); + int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura); + int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +@@ -942,7 +942,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); + int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, + dma_addr_t *dma); + int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, +- int stack_pages, int numptrs, int buf_size); ++ int stack_pages, int numptrs, int buf_size, int type); + int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); + +@@ -1012,6 +1012,8 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf); + int otx2_handle_ntuple_tc_features(struct net_device *netdev, + netdev_features_t features); + int otx2_smq_flush(struct otx2_nic *pfvf, int smq); ++void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, ++ u64 iova, int size); + + /* tc support */ + int otx2_init_tc(struct otx2_nic *nic); +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +index 6b7fb324e756e..8385b46736934 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +@@ -1591,7 +1591,9 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) + struct nix_lf_free_req *free_req; + struct mbox *mbox = &pf->mbox; + struct otx2_cq_queue *cq; ++ struct otx2_pool *pool; + struct msg_req *req; ++ int pool_id; + int qidx; + + /* Ensure all SQE are processed */ +@@ -1618,7 +1620,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) + for (qidx = 0; qidx < qset->cq_cnt; qidx++) { + cq = &qset->cq[qidx]; + if (cq->cq_type == CQ_RX) +- otx2_cleanup_rx_cqes(pf, cq); ++ otx2_cleanup_rx_cqes(pf, cq, qidx); + else + otx2_cleanup_tx_cqes(pf, cq); + } +@@ -1629,6 +1631,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) + /* Free RQ buffer pointers*/ + otx2_free_aura_ptr(pf, AURA_NIX_RQ); + ++ for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) { ++ pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx); ++ pool = &pf->qset.pool[pool_id]; ++ page_pool_destroy(pool->page_pool); ++ pool->page_pool = NULL; ++ } ++ + otx2_free_cq_res(pf); + + /* Free all ingress bandwidth profiles allocated */ +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +index e579183e52392..cc704cd3b5ae1 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +@@ -218,9 +218,6 @@ static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + va - page_address(page) + off, + len - off, pfvf->rbsize); +- +- otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM, +- pfvf->rbsize, DMA_FROM_DEVICE); + return true; + } + +@@ -383,6 +380,8 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, + if (pfvf->netdev->features & NETIF_F_RXCSUM) + skb->ip_summed = CHECKSUM_UNNECESSARY; + ++ skb_mark_for_recycle(skb); ++ + napi_gro_frags(napi); + } + +@@ -1191,11 +1190,13 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, + } + EXPORT_SYMBOL(otx2_sq_append_skb); + +-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) ++void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx) + { + struct nix_cqe_rx_s *cqe; ++ struct otx2_pool *pool; + int processed_cqe = 0; +- u64 iova, pa; ++ u16 pool_id; ++ u64 iova; + + if (pfvf->xdp_prog) + xdp_rxq_info_unreg(&cq->xdp_rxq); +@@ -1203,6 +1204,9 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) + if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) + return; + ++ pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx); ++ pool = &pfvf->qset.pool[pool_id]; ++ + while (cq->pend_cqe) { + cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq); + processed_cqe++; +@@ -1215,9 +1219,8 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) + continue; + } + iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM; +- pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); +- otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); +- put_page(virt_to_page(phys_to_virt(pa))); ++ ++ otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize); + } + + /* Free CQEs to HW */ +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +index 7ab6db9a986fa..b5d689eeff80b 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +@@ -118,6 +118,7 @@ struct otx2_cq_poll { + struct otx2_pool { + struct qmem *stack; + struct qmem *fc_addr; ++ struct page_pool *page_pool; + u16 rbsize; + }; + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +index e142d43f5a62c..95a2c8e616bd8 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +@@ -63,7 +63,7 @@ static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx) + + /* Initialize pool context */ + err = otx2_pool_init(pfvf, pool_id, stack_pages, +- num_sqbs, hw->sqb_size); ++ num_sqbs, hw->sqb_size, AURA_NIX_SQ); + if (err) + goto aura_free; + +-- +2.39.5 + diff --git a/queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch b/queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch new file mode 100644 index 0000000000..6d6e8d0099 --- /dev/null +++ b/queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch @@ -0,0 +1,45 @@ +From 4088d71aed0f5e4dc72d21f8534000687f203b5d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 May 2025 02:40:15 +0300 +Subject: remoteproc: qcom_wcnss: Fix on platforms without fallback regulators +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matti Lehtimäki + +[ Upstream commit 4ca45af0a56d00b86285d6fdd720dca3215059a7 ] + +Recent change to handle platforms with only single power domain broke +pronto-v3 which requires power domains and doesn't have fallback voltage +regulators in case power domains are missing. Add a check to verify +the number of fallback voltage regulators before using the code which +handles single power domain situation. + +Fixes: 65991ea8a6d1 ("remoteproc: qcom_wcnss: Handle platforms with only single power domain") +Signed-off-by: Matti Lehtimäki +Tested-by: Luca Weiss # sdm632-fairphone-fp3 +Link: https://lore.kernel.org/r/20250511234026.94735-1-matti.lehtimaki@gmail.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Sasha Levin +--- + drivers/remoteproc/qcom_wcnss.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c +index ce61e0e7cbeb8..af96541c9b69a 100644 +--- a/drivers/remoteproc/qcom_wcnss.c ++++ b/drivers/remoteproc/qcom_wcnss.c +@@ -445,7 +445,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss, + if (wcnss->num_pds) { + info += wcnss->num_pds; + /* Handle single power domain case */ +- num_vregs += num_pd_vregs - wcnss->num_pds; ++ if (wcnss->num_pds < num_pd_vregs) ++ num_vregs += num_pd_vregs - wcnss->num_pds; + } else { + num_vregs += num_pd_vregs; + } +-- +2.39.5 + diff --git a/queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch b/queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch new file mode 100644 index 0000000000..6d79e887c7 --- /dev/null +++ b/queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch @@ -0,0 +1,62 @@ +From e5a1bb921828d93b9eb67fa21178fd4ffc7ba80f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 18 May 2025 15:20:37 -0700 +Subject: sch_hfsc: Fix qlen accounting bug when using peek in hfsc_enqueue() + +From: Cong Wang + +[ Upstream commit 3f981138109f63232a5fb7165938d4c945cc1b9d ] + +When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the +child qdisc's peek() operation before incrementing sch->q.qlen and +sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may +trigger an immediate dequeue and potential packet drop. In such cases, +qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog +have not yet been updated, leading to inconsistent queue accounting. This +can leave an empty HFSC class in the active list, causing further +consequences like use-after-free. + +This patch fixes the bug by moving the increment of sch->q.qlen and +sch->qstats.backlog before the call to the child qdisc's peek() operation. +This ensures that queue length and backlog are always accurate when packet +drops or dequeues are triggered during the peek. + +Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline") +Reported-by: Mingi Cho +Signed-off-by: Cong Wang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com +Reviewed-by: Jamal Hadi Salim +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_hfsc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c +index fc1370c293730..ec6ee45100132 100644 +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -1568,6 +1568,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) + return err; + } + ++ sch->qstats.backlog += len; ++ sch->q.qlen++; ++ + if (first && !cl->cl_nactive) { + if (cl->cl_flags & HFSC_RSC) + init_ed(cl, len); +@@ -1583,9 +1586,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) + + } + +- sch->qstats.backlog += len; +- sch->q.qlen++; +- + return NET_XMIT_SUCCESS; + } + +-- +2.39.5 + diff --git a/queue-6.1/series b/queue-6.1/series index 2afdd7c76c..b3d6b37bc0 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -230,3 +230,23 @@ btrfs-correct-the-order-of-prelim_ref-arguments-in-b.patch wifi-iwlwifi-add-support-for-killer-on-mtl.patch xenbus-allow-pvh-dom0-a-non-local-xenstore.patch __legitimize_mnt-check-for-mnt_sync_umount-should-be.patch +espintcp-remove-encap-socket-caching-to-avoid-refere.patch +dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch +dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch +dmaengine-idxd-fix-allowing-write-from-different-add.patch +remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch +clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch +xfrm-sanitize-marks-before-insert.patch +dmaengine-idxd-fix-poll-return-value.patch +bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch +bridge-netfilter-fix-forwarding-of-fragmented-packet.patch +ice-fix-vf-num_mac-count-with-port-representors.patch +net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch +net-lan743x-restore-sgmii-ctrl-register-on-resume.patch +io_uring-fix-overflow-resched-cqe-reordering.patch +sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch +octeontx2-pf-add-support-for-page-pool.patch +octeontx2-pf-add-af_xdp-non-zero-copy-support.patch +net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch +octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch +octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch diff --git a/queue-6.1/xfrm-sanitize-marks-before-insert.patch b/queue-6.1/xfrm-sanitize-marks-before-insert.patch new file mode 100644 index 0000000000..82c1c56ae9 --- /dev/null +++ b/queue-6.1/xfrm-sanitize-marks-before-insert.patch @@ -0,0 +1,71 @@ +From d729f33af12fba969f96b3d6eef5e8ea3f3f7fb5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 May 2025 13:31:58 +0200 +Subject: xfrm: Sanitize marks before insert + +From: Paul Chaignon + +[ Upstream commit 0b91fda3a1f044141e1e615456ff62508c32b202 ] + +Prior to this patch, the mark is sanitized (applying the state's mask to +the state's value) only on inserts when checking if a conflicting XFRM +state or policy exists. + +We discovered in Cilium that this same sanitization does not occur +in the hot-path __xfrm_state_lookup. In the hot-path, the sk_buff's mark +is simply compared to the state's value: + + if ((mark & x->mark.m) != x->mark.v) + continue; + +Therefore, users can define unsanitized marks (ex. 0xf42/0xf00) which will +never match any packet. + +This commit updates __xfrm_state_insert and xfrm_policy_insert to store +the sanitized marks, thus removing this footgun. + +This has the side effect of changing the ip output, as the +returned mark will have the mask applied to it when printed. + +Fixes: 3d6acfa7641f ("xfrm: SA lookups with mark") +Signed-off-by: Paul Chaignon +Signed-off-by: Louis DeLosSantos +Co-developed-by: Louis DeLosSantos +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + net/xfrm/xfrm_policy.c | 3 +++ + net/xfrm/xfrm_state.c | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index a022f49846879..e015ff225b27a 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -1597,6 +1597,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + struct xfrm_policy *delpol; + struct hlist_head *chain; + ++ /* Sanitize mark before store */ ++ policy->mark.v &= policy->mark.m; ++ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); + if (chain) +diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c +index b5047a94c7d01..58c53bb1c5838 100644 +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -1275,6 +1275,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) + + list_add(&x->km.all, &net->xfrm.state_all); + ++ /* Sanitize mark before store */ ++ x->mark.v &= x->mark.m; ++ + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); +-- +2.39.5 +