]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 24 May 2025 10:22:29 +0000 (06:22 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 24 May 2025 10:22:29 +0000 (06:22 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch [new file with mode: 0644]
queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch [new file with mode: 0644]
queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch [new file with mode: 0644]
queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch [new file with mode: 0644]
queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch [new file with mode: 0644]
queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch [new file with mode: 0644]
queue-6.1/dmaengine-idxd-fix-poll-return-value.patch [new file with mode: 0644]
queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch [new file with mode: 0644]
queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch [new file with mode: 0644]
queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch [new file with mode: 0644]
queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch [new file with mode: 0644]
queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch [new file with mode: 0644]
queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch [new file with mode: 0644]
queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch [new file with mode: 0644]
queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch [new file with mode: 0644]
queue-6.1/octeontx2-pf-add-support-for-page-pool.patch [new file with mode: 0644]
queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch [new file with mode: 0644]
queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/xfrm-sanitize-marks-before-insert.patch [new file with mode: 0644]

diff --git a/queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch b/queue-6.1/bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch
new file mode 100644 (file)
index 0000000..7c3da26
--- /dev/null
@@ -0,0 +1,92 @@
+From 1094e07ac8cf907f3e873d790c503a880d953edd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 May 2025 15:00:30 -0400
+Subject: Bluetooth: L2CAP: Fix not checking l2cap_chan security level
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 7af8479d9eb4319b4ba7b47a8c4d2c55af1c31e1 ]
+
+l2cap_check_enc_key_size shall check the security level of the
+l2cap_chan rather than the hci_conn since for incoming connection
+request that may be different as hci_conn may already been
+encrypted using a different security level.
+
+Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/l2cap_core.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 222105e24d2d8..cb9b1edfcea2a 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -1561,7 +1561,8 @@ static void l2cap_request_info(struct l2cap_conn *conn)
+                      sizeof(req), &req);
+ }
+-static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
++static bool l2cap_check_enc_key_size(struct hci_conn *hcon,
++                                   struct l2cap_chan *chan)
+ {
+       /* The minimum encryption key size needs to be enforced by the
+        * host stack before establishing any L2CAP connections. The
+@@ -1575,7 +1576,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+       int min_key_size = hcon->hdev->min_enc_key_size;
+       /* On FIPS security level, key size must be 16 bytes */
+-      if (hcon->sec_level == BT_SECURITY_FIPS)
++      if (chan->sec_level == BT_SECURITY_FIPS)
+               min_key_size = 16;
+       return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
+@@ -1603,7 +1604,7 @@ static void l2cap_do_start(struct l2cap_chan *chan)
+           !__l2cap_no_conn_pending(chan))
+               return;
+-      if (l2cap_check_enc_key_size(conn->hcon))
++      if (l2cap_check_enc_key_size(conn->hcon, chan))
+               l2cap_start_connection(chan);
+       else
+               __set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
+@@ -1685,7 +1686,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
+                               continue;
+                       }
+-                      if (l2cap_check_enc_key_size(conn->hcon))
++                      if (l2cap_check_enc_key_size(conn->hcon, chan))
+                               l2cap_start_connection(chan);
+                       else
+                               l2cap_chan_close(chan, ECONNREFUSED);
+@@ -4187,7 +4188,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
+       /* Check if the ACL is secure enough (if not SDP) */
+       if (psm != cpu_to_le16(L2CAP_PSM_SDP) &&
+           (!hci_conn_check_link_mode(conn->hcon) ||
+-          !l2cap_check_enc_key_size(conn->hcon))) {
++          !l2cap_check_enc_key_size(conn->hcon, pchan))) {
+               conn->disc_reason = HCI_ERROR_AUTH_FAILURE;
+               result = L2CAP_CR_SEC_BLOCK;
+               goto response;
+@@ -8418,7 +8419,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
+               }
+               if (chan->state == BT_CONNECT) {
+-                      if (!status && l2cap_check_enc_key_size(hcon))
++                      if (!status && l2cap_check_enc_key_size(hcon, chan))
+                               l2cap_start_connection(chan);
+                       else
+                               __set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
+@@ -8428,7 +8429,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
+                       struct l2cap_conn_rsp rsp;
+                       __u16 res, stat;
+-                      if (!status && l2cap_check_enc_key_size(hcon)) {
++                      if (!status && l2cap_check_enc_key_size(hcon, chan)) {
+                               if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
+                                       res = L2CAP_CR_PEND;
+                                       stat = L2CAP_CS_AUTHOR_PEND;
+-- 
+2.39.5
+
diff --git a/queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch b/queue-6.1/bridge-netfilter-fix-forwarding-of-fragmented-packet.patch
new file mode 100644 (file)
index 0000000..f1ea670
--- /dev/null
@@ -0,0 +1,95 @@
+From 0bd57d44af1c9d6b3beb7172dd370a58d08d23e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 May 2025 11:48:48 +0300
+Subject: bridge: netfilter: Fix forwarding of fragmented packets
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 91b6dbced0ef1d680afdd69b14fc83d50ebafaf3 ]
+
+When netfilter defrag hooks are loaded (due to the presence of conntrack
+rules, for example), fragmented packets entering the bridge will be
+defragged by the bridge's pre-routing hook (br_nf_pre_routing() ->
+ipv4_conntrack_defrag()).
+
+Later on, in the bridge's post-routing hook, the defragged packet will
+be fragmented again. If the size of the largest fragment is larger than
+what the kernel has determined as the destination MTU (using
+ip_skb_dst_mtu()), the defragged packet will be dropped.
+
+Before commit ac6627a28dbf ("net: ipv4: Consolidate ipv4_mtu and
+ip_dst_mtu_maybe_forward"), ip_skb_dst_mtu() would return dst_mtu() as
+the destination MTU. Assuming the dst entry attached to the packet is
+the bridge's fake rtable one, this would simply be the bridge's MTU (see
+fake_mtu()).
+
+However, after above mentioned commit, ip_skb_dst_mtu() ends up
+returning the route's MTU stored in the dst entry's metrics. Ideally, in
+case the dst entry is the bridge's fake rtable one, this should be the
+bridge's MTU as the bridge takes care of updating this metric when its
+MTU changes (see br_change_mtu()).
+
+Unfortunately, the last operation is a no-op given the metrics attached
+to the fake rtable entry are marked as read-only. Therefore,
+ip_skb_dst_mtu() ends up returning 1500 (the initial MTU value) and
+defragged packets are dropped during fragmentation when dealing with
+large fragments and high MTU (e.g., 9k).
+
+Fix by moving the fake rtable entry's metrics to be per-bridge (in a
+similar fashion to the fake rtable entry itself) and marking them as
+writable, thereby allowing MTU changes to be reflected.
+
+Fixes: 62fa8a846d7d ("net: Implement read-only protection and COW'ing of metrics.")
+Fixes: 33eb9873a283 ("bridge: initialize fake_rtable metrics")
+Reported-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+Closes: https://lore.kernel.org/netdev/PH0PR10MB4504888284FF4CBA648197D0ACB82@PH0PR10MB4504.namprd10.prod.outlook.com/
+Tested-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://patch.msgid.link/20250515084848.727706-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_nf_core.c | 7 ++-----
+ net/bridge/br_private.h | 1 +
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
+index 8c69f0c95a8ed..b8c8deb87407d 100644
+--- a/net/bridge/br_nf_core.c
++++ b/net/bridge/br_nf_core.c
+@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = {
+  * ipt_REJECT needs it.  Future netfilter modules might
+  * require us to fill additional fields.
+  */
+-static const u32 br_dst_default_metrics[RTAX_MAX] = {
+-      [RTAX_MTU - 1] = 1500,
+-};
+-
+ void br_netfilter_rtable_init(struct net_bridge *br)
+ {
+       struct rtable *rt = &br->fake_rtable;
+       atomic_set(&rt->dst.__refcnt, 1);
+       rt->dst.dev = br->dev;
+-      dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
++      dst_init_metrics(&rt->dst, br->metrics, false);
++      dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu);
+       rt->dst.flags   = DST_NOXFRM | DST_FAKE_RTABLE;
+       rt->dst.ops = &fake_dst_ops;
+ }
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index 940de95167689..19fb505492521 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -478,6 +478,7 @@ struct net_bridge {
+               struct rtable           fake_rtable;
+               struct rt6_info         fake_rt6_info;
+       };
++      u32                             metrics[RTAX_MAX];
+ #endif
+       u16                             group_fwd_mask;
+       u16                             group_fwd_mask_required;
+-- 
+2.39.5
+
diff --git a/queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch b/queue-6.1/clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch
new file mode 100644 (file)
index 0000000..fdc6f5d
--- /dev/null
@@ -0,0 +1,131 @@
+From 1f4b2d4271c974e118c72b3fdcc68fc8991215a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 May 2025 13:06:31 +0100
+Subject: clk: sunxi-ng: d1: Add missing divider for MMC mod clocks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andre Przywara <andre.przywara@arm.com>
+
+[ Upstream commit 98e6da673cc6dd46ca9a599802bd2c8f83606710 ]
+
+The D1/R528/T113 SoCs have a hidden divider of 2 in the MMC mod clocks,
+just as other recent SoCs. So far we did not describe that, which led
+to the resulting MMC clock rate to be only half of its intended value.
+
+Use a macro that allows to describe a fixed post-divider, to compensate
+for that divisor.
+
+This brings the MMC performance on those SoCs to its expected level,
+so about 23 MB/s for SD cards, instead of the 11 MB/s measured so far.
+
+Fixes: 35b97bb94111 ("clk: sunxi-ng: Add support for the D1 SoC clocks")
+Reported-by: Kuba Szczodrzyński <kuba@szczodrzynski.pl>
+Signed-off-by: Andre Przywara <andre.przywara@arm.com>
+Link: https://patch.msgid.link/20250501120631.837186-1-andre.przywara@arm.com
+Signed-off-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/clk/sunxi-ng/ccu-sun20i-d1.c | 44 ++++++++++++++++------------
+ drivers/clk/sunxi-ng/ccu_mp.h        | 22 ++++++++++++++
+ 2 files changed, 47 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
+index cb4bf038e17f5..89d8bf4a30a26 100644
+--- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
++++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c
+@@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = {
+       { .hw = &pll_periph0_2x_clk.common.hw },
+       { .hw = &pll_audio1_div2_clk.common.hw },
+ };
+-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830,
+-                                     0, 4,    /* M */
+-                                     8, 2,    /* P */
+-                                     24, 3,   /* mux */
+-                                     BIT(31), /* gate */
+-                                     0);
+-
+-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834,
+-                                     0, 4,    /* M */
+-                                     8, 2,    /* P */
+-                                     24, 3,   /* mux */
+-                                     BIT(31), /* gate */
+-                                     0);
++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0",
++                                             mmc0_mmc1_parents, 0x830,
++                                             0, 4,            /* M */
++                                             8, 2,            /* P */
++                                             24, 3,           /* mux */
++                                             BIT(31),         /* gate */
++                                             2,               /* post-div */
++                                             0);
++
++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1",
++                                             mmc0_mmc1_parents, 0x834,
++                                             0, 4,            /* M */
++                                             8, 2,            /* P */
++                                             24, 3,           /* mux */
++                                             BIT(31),         /* gate */
++                                             2,               /* post-div */
++                                             0);
+ static const struct clk_parent_data mmc2_parents[] = {
+       { .fw_name = "hosc" },
+@@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = {
+       { .hw = &pll_periph0_800M_clk.common.hw },
+       { .hw = &pll_audio1_div2_clk.common.hw },
+ };
+-static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838,
+-                                     0, 4,    /* M */
+-                                     8, 2,    /* P */
+-                                     24, 3,   /* mux */
+-                                     BIT(31), /* gate */
+-                                     0);
++static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents,
++                                             0x838,
++                                             0, 4,            /* M */
++                                             8, 2,            /* P */
++                                             24, 3,           /* mux */
++                                             BIT(31),         /* gate */
++                                             2,               /* post-div */
++                                             0);
+ static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws,
+                         0x84c, BIT(0), 0);
+diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h
+index 6e50f3728fb5f..7d836a9fb3db3 100644
+--- a/drivers/clk/sunxi-ng/ccu_mp.h
++++ b/drivers/clk/sunxi-ng/ccu_mp.h
+@@ -52,6 +52,28 @@ struct ccu_mp {
+               }                                                       \
+       }
++#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \
++                                              _reg,                   \
++                                              _mshift, _mwidth,       \
++                                              _pshift, _pwidth,       \
++                                              _muxshift, _muxwidth,   \
++                                              _gate, _postdiv, _flags)\
++      struct ccu_mp _struct = {                                       \
++              .enable = _gate,                                        \
++              .m      = _SUNXI_CCU_DIV(_mshift, _mwidth),             \
++              .p      = _SUNXI_CCU_DIV(_pshift, _pwidth),             \
++              .mux    = _SUNXI_CCU_MUX(_muxshift, _muxwidth),         \
++              .fixed_post_div = _postdiv,                             \
++              .common = {                                             \
++                      .reg            = _reg,                         \
++                      .features       = CCU_FEATURE_FIXED_POSTDIV,    \
++                      .hw.init        = CLK_HW_INIT_PARENTS_DATA(_name, \
++                                                      _parents,       \
++                                                      &ccu_mp_ops,    \
++                                                      _flags),        \
++              }                                                       \
++      }
++
+ #define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg,    \
+                                  _mshift, _mwidth,                    \
+                                  _pshift, _pwidth,                    \
+-- 
+2.39.5
+
diff --git a/queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch b/queue-6.1/dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch
new file mode 100644 (file)
index 0000000..6dc04e6
--- /dev/null
@@ -0,0 +1,263 @@
+From b80fed3040c7aeee82e2c2fc1fa204ba5347ba11 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Apr 2023 13:31:35 -0700
+Subject: dmaengine: idxd: add idxd_copy_cr() to copy user completion record
+ during page fault handling
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+[ Upstream commit b022f59725f0ae846191abbd6d2e611d7f60f826 ]
+
+Define idxd_copy_cr() to copy completion record to fault address in
+user address that is found by work queue (wq) and PASID.
+
+It will be used to write the user's completion record that the hardware
+device is not able to write due to user completion record page fault.
+
+An xarray is added to associate the PASID and mm with the
+struct idxd_user_context so mm can be found by PASID and wq.
+
+It is called when handling the completion record fault in a kernel thread
+context. Switch to the mm using kthread_use_vm() and copy the
+completion record to the mm via copy_to_user(). Once the copy is
+completed, switch back to the current mm using kthread_unuse_mm().
+
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
+Suggested-by: Tony Luck <tony.luck@intel.com>
+Tested-by: Tony Zhu <tony.zhu@intel.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20230407203143.2189681-9-fenghua.yu@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Stable-dep-of: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/idxd/cdev.c  | 107 +++++++++++++++++++++++++++++++++++++--
+ drivers/dma/idxd/idxd.h  |   6 +++
+ drivers/dma/idxd/init.c  |   2 +
+ drivers/dma/idxd/sysfs.c |   1 +
+ 4 files changed, 111 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
+index e2a89873c6e1a..c7aa47f01df02 100644
+--- a/drivers/dma/idxd/cdev.c
++++ b/drivers/dma/idxd/cdev.c
+@@ -12,7 +12,9 @@
+ #include <linux/fs.h>
+ #include <linux/poll.h>
+ #include <linux/iommu.h>
++#include <linux/highmem.h>
+ #include <uapi/linux/idxd.h>
++#include <linux/xarray.h>
+ #include "registers.h"
+ #include "idxd.h"
+@@ -35,6 +37,7 @@ struct idxd_user_context {
+       struct idxd_wq *wq;
+       struct task_struct *task;
+       unsigned int pasid;
++      struct mm_struct *mm;
+       unsigned int flags;
+       struct iommu_sva *sva;
+ };
+@@ -69,6 +72,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode)
+       return idxd_cdev->wq;
+ }
++static void idxd_xa_pasid_remove(struct idxd_user_context *ctx)
++{
++      struct idxd_wq *wq = ctx->wq;
++      void *ptr;
++
++      mutex_lock(&wq->uc_lock);
++      ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL);
++      if (ptr != (void *)ctx)
++              dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n",
++                       ctx->pasid);
++      mutex_unlock(&wq->uc_lock);
++}
++
+ static int idxd_cdev_open(struct inode *inode, struct file *filp)
+ {
+       struct idxd_user_context *ctx;
+@@ -109,20 +125,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
+               pasid = iommu_sva_get_pasid(sva);
+               if (pasid == IOMMU_PASID_INVALID) {
+-                      iommu_sva_unbind_device(sva);
+                       rc = -EINVAL;
+-                      goto failed;
++                      goto failed_get_pasid;
+               }
+               ctx->sva = sva;
+               ctx->pasid = pasid;
++              ctx->mm = current->mm;
++
++              mutex_lock(&wq->uc_lock);
++              rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL);
++              mutex_unlock(&wq->uc_lock);
++              if (rc < 0)
++                      dev_warn(dev, "PASID entry already exist in xarray.\n");
+               if (wq_dedicated(wq)) {
+                       rc = idxd_wq_set_pasid(wq, pasid);
+                       if (rc < 0) {
+                               iommu_sva_unbind_device(sva);
+                               dev_err(dev, "wq set pasid failed: %d\n", rc);
+-                              goto failed;
++                              goto failed_set_pasid;
+                       }
+               }
+       }
+@@ -131,7 +153,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
+       mutex_unlock(&wq->wq_lock);
+       return 0;
+- failed:
++failed_set_pasid:
++      if (device_user_pasid_enabled(idxd))
++              idxd_xa_pasid_remove(ctx);
++failed_get_pasid:
++      if (device_user_pasid_enabled(idxd))
++              iommu_sva_unbind_device(sva);
++failed:
+       mutex_unlock(&wq->wq_lock);
+       kfree(ctx);
+       return rc;
+@@ -162,8 +190,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
+               }
+       }
+-      if (ctx->sva)
++      if (ctx->sva) {
+               iommu_sva_unbind_device(ctx->sva);
++              idxd_xa_pasid_remove(ctx);
++      }
+       kfree(ctx);
+       mutex_lock(&wq->wq_lock);
+       idxd_wq_put(wq);
+@@ -496,3 +526,70 @@ void idxd_cdev_remove(void)
+               ida_destroy(&ictx[i].minor_ida);
+       }
+ }
++
++/**
++ * idxd_copy_cr - copy completion record to user address space found by wq and
++ *              PASID
++ * @wq:               work queue
++ * @pasid:    PASID
++ * @addr:     user fault address to write
++ * @cr:               completion record
++ * @len:      number of bytes to copy
++ *
++ * This is called by a work that handles completion record fault.
++ *
++ * Return: number of bytes copied.
++ */
++int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
++               void *cr, int len)
++{
++      struct device *dev = &wq->idxd->pdev->dev;
++      int left = len, status_size = 1;
++      struct idxd_user_context *ctx;
++      struct mm_struct *mm;
++
++      mutex_lock(&wq->uc_lock);
++
++      ctx = xa_load(&wq->upasid_xa, pasid);
++      if (!ctx) {
++              dev_warn(dev, "No user context\n");
++              goto out;
++      }
++
++      mm = ctx->mm;
++      /*
++       * The completion record fault handling work is running in kernel
++       * thread context. It temporarily switches to the mm to copy cr
++       * to addr in the mm.
++       */
++      kthread_use_mm(mm);
++      left = copy_to_user((void __user *)addr + status_size, cr + status_size,
++                          len - status_size);
++      /*
++       * Copy status only after the rest of completion record is copied
++       * successfully so that the user gets the complete completion record
++       * when a non-zero status is polled.
++       */
++      if (!left) {
++              u8 status;
++
++              /*
++               * Ensure that the completion record's status field is written
++               * after the rest of the completion record has been written.
++               * This ensures that the user receives the correct completion
++               * record information once polling for a non-zero status.
++               */
++              wmb();
++              status = *(u8 *)cr;
++              if (put_user(status, (u8 __user *)addr))
++                      left += status_size;
++      } else {
++              left += status_size;
++      }
++      kthread_unuse_mm(mm);
++
++out:
++      mutex_unlock(&wq->uc_lock);
++
++      return len - left;
++}
+diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
+index 5dbb67ff1c0cb..c3ace4aed0fc5 100644
+--- a/drivers/dma/idxd/idxd.h
++++ b/drivers/dma/idxd/idxd.h
+@@ -215,6 +215,10 @@ struct idxd_wq {
+       char name[WQ_NAME_SIZE + 1];
+       u64 max_xfer_bytes;
+       u32 max_batch_size;
++
++      /* Lock to protect upasid_xa access. */
++      struct mutex uc_lock;
++      struct xarray upasid_xa;
+ };
+ struct idxd_engine {
+@@ -666,6 +670,8 @@ void idxd_cdev_remove(void);
+ int idxd_cdev_get_major(struct idxd_device *idxd);
+ int idxd_wq_add_cdev(struct idxd_wq *wq);
+ void idxd_wq_del_cdev(struct idxd_wq *wq);
++int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
++               void *buf, int len);
+ /* perfmon */
+ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
+diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
+index 7cb76db5ad600..ea651d5cf332d 100644
+--- a/drivers/dma/idxd/init.c
++++ b/drivers/dma/idxd/init.c
+@@ -218,6 +218,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
+                       }
+                       bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
+               }
++              mutex_init(&wq->uc_lock);
++              xa_init(&wq->upasid_xa);
+               idxd->wqs[i] = wq;
+       }
+diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
+index c811757d0f97f..0689464c4816a 100644
+--- a/drivers/dma/idxd/sysfs.c
++++ b/drivers/dma/idxd/sysfs.c
+@@ -1315,6 +1315,7 @@ static void idxd_conf_wq_release(struct device *dev)
+       bitmap_free(wq->opcap_bmap);
+       kfree(wq->wqcfg);
++      xa_destroy(&wq->upasid_xa);
+       kfree(wq);
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch b/queue-6.1/dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch
new file mode 100644 (file)
index 0000000..4f51dfd
--- /dev/null
@@ -0,0 +1,78 @@
+From c3f6037a3b216750c6848bdee862a31168554b3c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Apr 2023 13:31:33 -0700
+Subject: dmaengine: idxd: add per DSA wq workqueue for processing cr faults
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+[ Upstream commit 2f30decd2f23a376d2ed73dfe4c601421edf501a ]
+
+Add a workqueue for user submitted completion record fault processing.
+The workqueue creation and destruction lifetime will be tied to the user
+sub-driver since it will only be used when the wq is a user type.
+
+Tested-by: Tony Zhu <tony.zhu@intel.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Link: https://lore.kernel.org/r/20230407203143.2189681-7-fenghua.yu@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Stable-dep-of: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/idxd/cdev.c | 11 +++++++++++
+ drivers/dma/idxd/idxd.h |  1 +
+ 2 files changed, 12 insertions(+)
+
+diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
+index 9f8adb7013eba..e2a89873c6e1a 100644
+--- a/drivers/dma/idxd/cdev.c
++++ b/drivers/dma/idxd/cdev.c
+@@ -408,6 +408,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
+       }
+       mutex_lock(&wq->wq_lock);
++
++      wq->wq = create_workqueue(dev_name(wq_confdev(wq)));
++      if (!wq->wq) {
++              rc = -ENOMEM;
++              goto wq_err;
++      }
++
+       wq->type = IDXD_WQT_USER;
+       rc = drv_enable_wq(wq);
+       if (rc < 0)
+@@ -426,7 +433,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
+ err_cdev:
+       drv_disable_wq(wq);
+ err:
++      destroy_workqueue(wq->wq);
+       wq->type = IDXD_WQT_NONE;
++wq_err:
+       mutex_unlock(&wq->wq_lock);
+       return rc;
+ }
+@@ -439,6 +448,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
+       idxd_wq_del_cdev(wq);
+       drv_disable_wq(wq);
+       wq->type = IDXD_WQT_NONE;
++      destroy_workqueue(wq->wq);
++      wq->wq = NULL;
+       mutex_unlock(&wq->wq_lock);
+ }
+diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
+index 14c6ef987fede..5dbb67ff1c0cb 100644
+--- a/drivers/dma/idxd/idxd.h
++++ b/drivers/dma/idxd/idxd.h
+@@ -185,6 +185,7 @@ struct idxd_wq {
+       struct idxd_dev idxd_dev;
+       struct idxd_cdev *idxd_cdev;
+       struct wait_queue_head err_queue;
++      struct workqueue_struct *wq;
+       struct idxd_device *idxd;
+       int id;
+       struct idxd_irq_entry ie;
+-- 
+2.39.5
+
diff --git a/queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch b/queue-6.1/dmaengine-idxd-fix-allowing-write-from-different-add.patch
new file mode 100644 (file)
index 0000000..8a47c6b
--- /dev/null
@@ -0,0 +1,59 @@
+From f97bc5be0e30ea1ab9867cf1267f386554ca69f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 10:03:37 -0700
+Subject: dmaengine: idxd: Fix allowing write() from different address spaces
+
+From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+
+[ Upstream commit 8dfa57aabff625bf445548257f7711ef294cd30e ]
+
+Check if the process submitting the descriptor belongs to the same
+address space as the one that opened the file, reject otherwise.
+
+Fixes: 6827738dc684 ("dmaengine: idxd: add a write() method for applications to submit work")
+Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20250421170337.3008875-1-dave.jiang@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/idxd/cdev.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
+index c7aa47f01df02..186f005bfa8fd 100644
+--- a/drivers/dma/idxd/cdev.c
++++ b/drivers/dma/idxd/cdev.c
+@@ -240,6 +240,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+       if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
++      if (current->mm != ctx->mm)
++              return -EPERM;
++
+       rc = check_vma(wq, vma, __func__);
+       if (rc < 0)
+               return rc;
+@@ -306,6 +309,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t
+       ssize_t written = 0;
+       int i;
++      if (current->mm != ctx->mm)
++              return -EPERM;
++
+       for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) {
+               int rc = idxd_submit_user_descriptor(ctx, udesc + i);
+@@ -326,6 +332,9 @@ static __poll_t idxd_cdev_poll(struct file *filp,
+       struct idxd_device *idxd = wq->idxd;
+       __poll_t out = 0;
++      if (current->mm != ctx->mm)
++              return -EPERM;
++
+       poll_wait(filp, &wq->err_queue, wait);
+       spin_lock(&idxd->dev_lock);
+       if (idxd->sw_err.valid)
+-- 
+2.39.5
+
diff --git a/queue-6.1/dmaengine-idxd-fix-poll-return-value.patch b/queue-6.1/dmaengine-idxd-fix-poll-return-value.patch
new file mode 100644 (file)
index 0000000..ab28f6c
--- /dev/null
@@ -0,0 +1,41 @@
+From 6972b6958b727bd67d562d755352feb080666629 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 May 2025 10:05:48 -0700
+Subject: dmaengine: idxd: Fix ->poll() return value
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+[ Upstream commit ae74cd15ade833adc289279b5c6f12e78f64d4d7 ]
+
+The fix to block access from different address space did not return a
+correct value for ->poll() change.  kernel test bot reported that a
+return value of type __poll_t is expected rather than int. Fix to return
+POLLNVAL to indicate invalid request.
+
+Fixes: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202505081851.rwD7jVxg-lkp@intel.com/
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20250508170548.2747425-1-dave.jiang@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/idxd/cdev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
+index 186f005bfa8fd..d736ab15ade24 100644
+--- a/drivers/dma/idxd/cdev.c
++++ b/drivers/dma/idxd/cdev.c
+@@ -333,7 +333,7 @@ static __poll_t idxd_cdev_poll(struct file *filp,
+       __poll_t out = 0;
+       if (current->mm != ctx->mm)
+-              return -EPERM;
++              return POLLNVAL;
+       poll_wait(filp, &wq->err_queue, wait);
+       spin_lock(&idxd->dev_lock);
+-- 
+2.39.5
+
diff --git a/queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch b/queue-6.1/espintcp-remove-encap-socket-caching-to-avoid-refere.patch
new file mode 100644 (file)
index 0000000..ef3a828
--- /dev/null
@@ -0,0 +1,252 @@
+From f12d15deab91dea075c1396198801887c0aba2f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 15:59:57 +0200
+Subject: espintcp: remove encap socket caching to avoid reference leak
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 028363685bd0b7a19b4a820f82dd905b1dc83999 ]
+
+The current scheme for caching the encap socket can lead to reference
+leaks when we try to delete the netns.
+
+The reference chain is: xfrm_state -> enacp_sk -> netns
+
+Since the encap socket is a userspace socket, it holds a reference on
+the netns. If we delete the espintcp state (through flush or
+individual delete) before removing the netns, the reference on the
+socket is dropped and the netns is correctly deleted. Otherwise, the
+netns may not be reachable anymore (if all processes within the ns
+have terminated), so we cannot delete the xfrm state to drop its
+reference on the socket.
+
+This patch results in a small (~2% in my tests) performance
+regression.
+
+A GC-type mechanism could be added for the socket cache, to clear
+references if the state hasn't been used "recently", but it's a lot
+more complex than just not caching the socket.
+
+Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/xfrm.h    |  1 -
+ net/ipv4/esp4.c       | 49 ++++---------------------------------------
+ net/ipv6/esp6.c       | 49 ++++---------------------------------------
+ net/xfrm/xfrm_state.c |  3 ---
+ 4 files changed, 8 insertions(+), 94 deletions(-)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index bf670929622dc..64911162ab5f4 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -212,7 +212,6 @@ struct xfrm_state {
+       /* Data for encapsulator */
+       struct xfrm_encap_tmpl  *encap;
+-      struct sock __rcu       *encap_sk;
+       /* Data for care-of address */
+       xfrm_address_t  *coaddr;
+diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+index 419969b268225..8f5417ff355d7 100644
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -118,47 +118,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+ }
+ #ifdef CONFIG_INET_ESPINTCP
+-struct esp_tcp_sk {
+-      struct sock *sk;
+-      struct rcu_head rcu;
+-};
+-
+-static void esp_free_tcp_sk(struct rcu_head *head)
+-{
+-      struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+-
+-      sock_put(esk->sk);
+-      kfree(esk);
+-}
+-
+ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
+ {
+       struct xfrm_encap_tmpl *encap = x->encap;
+       struct net *net = xs_net(x);
+-      struct esp_tcp_sk *esk;
+       __be16 sport, dport;
+-      struct sock *nsk;
+       struct sock *sk;
+-      sk = rcu_dereference(x->encap_sk);
+-      if (sk && sk->sk_state == TCP_ESTABLISHED)
+-              return sk;
+-
+       spin_lock_bh(&x->lock);
+       sport = encap->encap_sport;
+       dport = encap->encap_dport;
+-      nsk = rcu_dereference_protected(x->encap_sk,
+-                                      lockdep_is_held(&x->lock));
+-      if (sk && sk == nsk) {
+-              esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+-              if (!esk) {
+-                      spin_unlock_bh(&x->lock);
+-                      return ERR_PTR(-ENOMEM);
+-              }
+-              RCU_INIT_POINTER(x->encap_sk, NULL);
+-              esk->sk = sk;
+-              call_rcu(&esk->rcu, esp_free_tcp_sk);
+-      }
+       spin_unlock_bh(&x->lock);
+       sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
+@@ -171,20 +140,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
+               return ERR_PTR(-EINVAL);
+       }
+-      spin_lock_bh(&x->lock);
+-      nsk = rcu_dereference_protected(x->encap_sk,
+-                                      lockdep_is_held(&x->lock));
+-      if (encap->encap_sport != sport ||
+-          encap->encap_dport != dport) {
+-              sock_put(sk);
+-              sk = nsk ?: ERR_PTR(-EREMCHG);
+-      } else if (sk == nsk) {
+-              sock_put(sk);
+-      } else {
+-              rcu_assign_pointer(x->encap_sk, sk);
+-      }
+-      spin_unlock_bh(&x->lock);
+-
+       return sk;
+ }
+@@ -207,6 +162,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+               err = espintcp_push_skb(sk, skb);
+       bh_unlock_sock(sk);
++      sock_put(sk);
++
+ out:
+       rcu_read_unlock();
+       return err;
+@@ -391,6 +348,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
+       if (IS_ERR(sk))
+               return ERR_CAST(sk);
++      sock_put(sk);
++
+       *lenp = htons(len);
+       esph = (struct ip_esp_hdr *)(lenp + 1);
+diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
+index a021c88d3d9b8..085a83b807afd 100644
+--- a/net/ipv6/esp6.c
++++ b/net/ipv6/esp6.c
+@@ -135,47 +135,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+ }
+ #ifdef CONFIG_INET6_ESPINTCP
+-struct esp_tcp_sk {
+-      struct sock *sk;
+-      struct rcu_head rcu;
+-};
+-
+-static void esp_free_tcp_sk(struct rcu_head *head)
+-{
+-      struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+-
+-      sock_put(esk->sk);
+-      kfree(esk);
+-}
+-
+ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
+ {
+       struct xfrm_encap_tmpl *encap = x->encap;
+       struct net *net = xs_net(x);
+-      struct esp_tcp_sk *esk;
+       __be16 sport, dport;
+-      struct sock *nsk;
+       struct sock *sk;
+-      sk = rcu_dereference(x->encap_sk);
+-      if (sk && sk->sk_state == TCP_ESTABLISHED)
+-              return sk;
+-
+       spin_lock_bh(&x->lock);
+       sport = encap->encap_sport;
+       dport = encap->encap_dport;
+-      nsk = rcu_dereference_protected(x->encap_sk,
+-                                      lockdep_is_held(&x->lock));
+-      if (sk && sk == nsk) {
+-              esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+-              if (!esk) {
+-                      spin_unlock_bh(&x->lock);
+-                      return ERR_PTR(-ENOMEM);
+-              }
+-              RCU_INIT_POINTER(x->encap_sk, NULL);
+-              esk->sk = sk;
+-              call_rcu(&esk->rcu, esp_free_tcp_sk);
+-      }
+       spin_unlock_bh(&x->lock);
+       sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
+@@ -188,20 +157,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
+               return ERR_PTR(-EINVAL);
+       }
+-      spin_lock_bh(&x->lock);
+-      nsk = rcu_dereference_protected(x->encap_sk,
+-                                      lockdep_is_held(&x->lock));
+-      if (encap->encap_sport != sport ||
+-          encap->encap_dport != dport) {
+-              sock_put(sk);
+-              sk = nsk ?: ERR_PTR(-EREMCHG);
+-      } else if (sk == nsk) {
+-              sock_put(sk);
+-      } else {
+-              rcu_assign_pointer(x->encap_sk, sk);
+-      }
+-      spin_unlock_bh(&x->lock);
+-
+       return sk;
+ }
+@@ -224,6 +179,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+               err = espintcp_push_skb(sk, skb);
+       bh_unlock_sock(sk);
++      sock_put(sk);
++
+ out:
+       rcu_read_unlock();
+       return err;
+@@ -427,6 +384,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+       if (IS_ERR(sk))
+               return ERR_CAST(sk);
++      sock_put(sk);
++
+       *lenp = htons(len);
+       esph = (struct ip_esp_hdr *)(lenp + 1);
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index 2f4cf976b59a3..b5047a94c7d01 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -694,9 +694,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
+               net->xfrm.state_num--;
+               spin_unlock(&net->xfrm.xfrm_state_lock);
+-              if (x->encap_sk)
+-                      sock_put(rcu_dereference_raw(x->encap_sk));
+-
+               xfrm_dev_state_delete(x);
+               /* All xfrm_state objects are created by xfrm_state_alloc.
+-- 
+2.39.5
+
diff --git a/queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch b/queue-6.1/ice-fix-vf-num_mac-count-with-port-representors.patch
new file mode 100644 (file)
index 0000000..3394fc3
--- /dev/null
@@ -0,0 +1,53 @@
+From 22eb6ab71da9d347ceba6af60163e681430085d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Apr 2025 11:13:52 -0700
+Subject: ice: fix vf->num_mac count with port representors
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit bbd95160a03dbfcd01a541f25c27ddb730dfbbd5 ]
+
+The ice_vc_repr_add_mac() function indicates that it does not store the MAC
+address filters in the firmware. However, it still increments vf->num_mac.
+This is incorrect, as vf->num_mac should represent the number of MAC
+filters currently programmed to firmware.
+
+Indeed, we only perform this increment if the requested filter is a unicast
+address that doesn't match the existing vf->hw_lan_addr. In addition,
+ice_vc_repr_del_mac() does not decrement the vf->num_mac counter. This
+results in the counter becoming out of sync with the actual count.
+
+As it turns out, vf->num_mac is currently only used in legacy made without
+port representors. The single place where the value is checked is for
+enforcing a filter limit on untrusted VFs.
+
+Upcoming patches to support VF Live Migration will use this value when
+determining the size of the TLV for MAC address filters. Fix the
+representor mode function to stop incrementing the counter incorrectly.
+
+Fixes: ac19e03ef780 ("ice: allow process VF opcodes in different ways")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Sujai Buvaneswaran <sujai.buvaneswaran@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_virtchnl.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+index 42d8e5e771b7e..fa9d928081d63 100644
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+@@ -3551,7 +3551,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
+               }
+               ice_vfhw_mac_add(vf, &al->list[i]);
+-              vf->num_mac++;
+               break;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch b/queue-6.1/io_uring-fix-overflow-resched-cqe-reordering.patch
new file mode 100644 (file)
index 0000000..9c8336b
--- /dev/null
@@ -0,0 +1,38 @@
+From fa5549c99e2ae67200753c34fe522451ce751cc4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 17 May 2025 13:27:37 +0100
+Subject: io_uring: fix overflow resched cqe reordering
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit a7d755ed9ce9738af3db602eb29d32774a180bc7 ]
+
+Leaving the CQ critical section in the middle of a overflow flushing
+can cause cqe reordering since the cache cq pointers are reset and any
+new cqe emitters that might get called in between are not going to be
+forced into io_cqe_cache_refill().
+
+Fixes: eac2ca2d682f9 ("io_uring: check if we need to reschedule during overflow flush")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/90ba817f1a458f091f355f407de1c911d2b93bbf.1747483784.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index f39d66589180e..ad462724246a7 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -627,6 +627,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+                * to care for a non-real case.
+                */
+               if (need_resched()) {
++                      ctx->cqe_sentinel = ctx->cqe_cached;
+                       io_cq_unlock_post(ctx);
+                       mutex_unlock(&ctx->uring_lock);
+                       cond_resched();
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch b/queue-6.1/net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch
new file mode 100644 (file)
index 0000000..1a41df4
--- /dev/null
@@ -0,0 +1,48 @@
+From 79260d384fbd3db3c98166d5be9c99fc5d90bf37 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 May 2025 18:49:36 +0200
+Subject: net: dwmac-sun8i: Use parsed internal PHY address instead of 1
+
+From: Paul Kocialkowski <paulk@sys-base.io>
+
+[ Upstream commit 47653e4243f2b0a26372e481ca098936b51ec3a8 ]
+
+While the MDIO address of the internal PHY on Allwinner sun8i chips is
+generally 1, of_mdio_parse_addr is used to cleanly parse the address
+from the device-tree instead of hardcoding it.
+
+A commit reworking the code ditched the parsed value and hardcoded the
+value 1 instead, which didn't really break anything but is more fragile
+and not future-proof.
+
+Restore the initial behavior using the parsed address returned from the
+helper.
+
+Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs")
+Signed-off-by: Paul Kocialkowski <paulk@sys-base.io>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Acked-by: Corentin LABBE <clabbe.montjoie@gmail.com>
+Tested-by: Corentin LABBE <clabbe.montjoie@gmail.com>
+Link: https://patch.msgid.link/20250519164936.4172658-1-paulk@sys-base.io
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+index f834472599f75..0921b78c6244f 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+@@ -948,7 +948,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
+               /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
+                * address. No need to mask it again.
+                */
+-              reg |= 1 << H3_EPHY_ADDR_SHIFT;
++              reg |= ret << H3_EPHY_ADDR_SHIFT;
+       } else {
+               /* For SoCs without internal PHY the PHY selection bit should be
+                * set to 0 (external PHY).
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch b/queue-6.1/net-lan743x-restore-sgmii-ctrl-register-on-resume.patch
new file mode 100644 (file)
index 0000000..2fc7101
--- /dev/null
@@ -0,0 +1,92 @@
+From e1dfb6e5d724069d5f0afcfb735055ee10231eb6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 May 2025 09:27:19 +0530
+Subject: net: lan743x: Restore SGMII CTRL register on resume
+
+From: Thangaraj Samynathan <thangaraj.s@microchip.com>
+
+[ Upstream commit 293e38ff4e4c2ba53f3fd47d8a4a9f0f0414a7a6 ]
+
+SGMII_CTRL register, which specifies the active interface, was not
+properly restored when resuming from suspend. This led to incorrect
+interface selection after resume particularly in scenarios involving
+the FPGA.
+
+To fix this:
+- Move the SGMII_CTRL setup out of the probe function.
+- Initialize the register in the hardware initialization helper function,
+which is called during both device initialization and resume.
+
+This ensures the interface configuration is consistently restored after
+suspend/resume cycles.
+
+Fixes: a46d9d37c4f4f ("net: lan743x: Add support for SGMII interface")
+Signed-off-by: Thangaraj Samynathan <thangaraj.s@microchip.com>
+Link: https://patch.msgid.link/20250516035719.117960-1-thangaraj.s@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/microchip/lan743x_main.c | 19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
+index 2e69ba0143b15..fd35554191793 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.c
++++ b/drivers/net/ethernet/microchip/lan743x_main.c
+@@ -3253,6 +3253,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+                                struct pci_dev *pdev)
+ {
+       struct lan743x_tx *tx;
++      u32 sgmii_ctl;
+       int index;
+       int ret;
+@@ -3265,6 +3266,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+               spin_lock_init(&adapter->eth_syslock_spinlock);
+               mutex_init(&adapter->sgmii_rw_lock);
+               pci11x1x_set_rfe_rd_fifo_threshold(adapter);
++              sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
++              if (adapter->is_sgmii_en) {
++                      sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_;
++                      sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_;
++              } else {
++                      sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_;
++                      sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_;
++              }
++              lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+       } else {
+               adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
+               adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
+@@ -3313,7 +3323,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+ {
+-      u32 sgmii_ctl;
+       int ret;
+       adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
+@@ -3325,10 +3334,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+       adapter->mdiobus->priv = (void *)adapter;
+       if (adapter->is_pci11x1x) {
+               if (adapter->is_sgmii_en) {
+-                      sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+-                      sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_;
+-                      sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_;
+-                      lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+                       netif_dbg(adapter, drv, adapter->netdev,
+                                 "SGMII operation\n");
+                       adapter->mdiobus->probe_capabilities = MDIOBUS_C22_C45;
+@@ -3338,10 +3343,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+                       netif_dbg(adapter, drv, adapter->netdev,
+                                 "lan743x-mdiobus-c45\n");
+               } else {
+-                      sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL);
+-                      sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_;
+-                      sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_;
+-                      lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl);
+                       netif_dbg(adapter, drv, adapter->netdev,
+                                 "RGMII operation\n");
+                       // Only C22 support when RGMII I/F
+-- 
+2.39.5
+
diff --git a/queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch b/queue-6.1/net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch
new file mode 100644 (file)
index 0000000..3f62daf
--- /dev/null
@@ -0,0 +1,125 @@
+From 1bc898f32032b96012c3ade397c3cdd3f986702b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 May 2025 18:14:04 +0800
+Subject: net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done
+
+From: Wang Liang <wangliang74@huawei.com>
+
+[ Upstream commit e279024617134c94fd3e37470156534d5f2b3472 ]
+
+Syzbot reported a slab-use-after-free with the following call trace:
+
+  ==================================================================
+  BUG: KASAN: slab-use-after-free in tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840
+  Read of size 8 at addr ffff88807a733000 by task kworker/1:0/25
+
+  Call Trace:
+   kasan_report+0xd9/0x110 mm/kasan/report.c:601
+   tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840
+   crypto_request_complete include/crypto/algapi.h:266
+   aead_request_complete include/crypto/internal/aead.h:85
+   cryptd_aead_crypt+0x3b8/0x750 crypto/cryptd.c:772
+   crypto_request_complete include/crypto/algapi.h:266
+   cryptd_queue_worker+0x131/0x200 crypto/cryptd.c:181
+   process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231
+
+  Allocated by task 8355:
+   kzalloc_noprof include/linux/slab.h:778
+   tipc_crypto_start+0xcc/0x9e0 net/tipc/crypto.c:1466
+   tipc_init_net+0x2dd/0x430 net/tipc/core.c:72
+   ops_init+0xb9/0x650 net/core/net_namespace.c:139
+   setup_net+0x435/0xb40 net/core/net_namespace.c:343
+   copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508
+   create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110
+   unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228
+   ksys_unshare+0x419/0x970 kernel/fork.c:3323
+   __do_sys_unshare kernel/fork.c:3394
+
+  Freed by task 63:
+   kfree+0x12a/0x3b0 mm/slub.c:4557
+   tipc_crypto_stop+0x23c/0x500 net/tipc/crypto.c:1539
+   tipc_exit_net+0x8c/0x110 net/tipc/core.c:119
+   ops_exit_list+0xb0/0x180 net/core/net_namespace.c:173
+   cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640
+   process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231
+
+After freed the tipc_crypto tx by delete namespace, tipc_aead_encrypt_done
+may still visit it in cryptd_queue_worker workqueue.
+
+I reproduce this issue by:
+  ip netns add ns1
+  ip link add veth1 type veth peer name veth2
+  ip link set veth1 netns ns1
+  ip netns exec ns1 tipc bearer enable media eth dev veth1
+  ip netns exec ns1 tipc node set key this_is_a_master_key master
+  ip netns exec ns1 tipc bearer disable media eth dev veth1
+  ip netns del ns1
+
+The key of reproduction is that, simd_aead_encrypt is interrupted, leading
+to crypto_simd_usable() return false. Thus, the cryptd_queue_worker is
+triggered, and the tipc_crypto tx will be visited.
+
+  tipc_disc_timeout
+    tipc_bearer_xmit_skb
+      tipc_crypto_xmit
+        tipc_aead_encrypt
+          crypto_aead_encrypt
+            // encrypt()
+            simd_aead_encrypt
+              // crypto_simd_usable() is false
+              child = &ctx->cryptd_tfm->base;
+
+  simd_aead_encrypt
+    crypto_aead_encrypt
+      // encrypt()
+      cryptd_aead_encrypt_enqueue
+        cryptd_aead_enqueue
+          cryptd_enqueue_request
+            // trigger cryptd_queue_worker
+            queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work)
+
+Fix this by holding net reference count before encrypt.
+
+Reported-by: syzbot+55c12726619ff85ce1f6@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=55c12726619ff85ce1f6
+Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication")
+Signed-off-by: Wang Liang <wangliang74@huawei.com>
+Link: https://patch.msgid.link/20250520101404.1341730-1-wangliang74@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/crypto.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
+index 25c18f8783ce9..a9c02fac039b5 100644
+--- a/net/tipc/crypto.c
++++ b/net/tipc/crypto.c
+@@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+               goto exit;
+       }
++      /* Get net to avoid freed tipc_crypto when delete namespace */
++      get_net(aead->crypto->net);
++
+       /* Now, do encrypt */
+       rc = crypto_aead_encrypt(req);
+       if (rc == -EINPROGRESS || rc == -EBUSY)
+               return rc;
+       tipc_bearer_put(b);
++      put_net(aead->crypto->net);
+ exit:
+       kfree(ctx);
+@@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
+       kfree(tx_ctx);
+       tipc_bearer_put(b);
+       tipc_aead_put(aead);
++      put_net(net);
+ }
+ /**
+-- 
+2.39.5
+
diff --git a/queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch b/queue-6.1/octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch
new file mode 100644 (file)
index 0000000..5383012
--- /dev/null
@@ -0,0 +1,111 @@
+From d029cfff9be19b26fbc810d68f78b4a72d09fd7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 May 2025 11:38:34 +0530
+Subject: octeontx2-af: Fix APR entry mapping based on APR_LMT_CFG
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit a6ae7129819ad20788e610261246e71736543b8b ]
+
+The current implementation maps the APR table using a fixed size,
+which can lead to incorrect mapping when the number of PFs and VFs
+varies.
+This patch corrects the mapping by calculating the APR table
+size dynamically based on the values configured in the
+APR_LMT_CFG register, ensuring accurate representation
+of APR entries in debugfs.
+
+Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table").
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Link: https://patch.msgid.link/20250521060834.19780-3-gakula@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c |  9 ++++++---
+ .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c   | 11 ++++++++---
+ 2 files changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+index 6ec0609074dca..5cd45846237e2 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+@@ -13,7 +13,6 @@
+ /* RVU LMTST */
+ #define LMT_TBL_OP_READ               0
+ #define LMT_TBL_OP_WRITE      1
+-#define LMT_MAP_TABLE_SIZE    (128 * 1024)
+ #define LMT_MAPTBL_ENTRY_SIZE 16
+ #define LMT_MAX_VFS           256
+@@ -26,10 +25,14 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
+ {
+       void __iomem *lmt_map_base;
+       u64 tbl_base, cfg;
++      int pfs, vfs;
+       tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
++      cfg  = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
++      vfs = 1 << (cfg & 0xF);
++      pfs = 1 << ((cfg >> 4) & 0x7);
+-      lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE);
++      lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE);
+       if (!lmt_map_base) {
+               dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
+               return -ENOMEM;
+@@ -80,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
+       mutex_lock(&rvu->rsrc_lock);
+       rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova);
+-      pf = rvu_get_pf(pcifunc) & 0x1F;
++      pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK;
+       val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 |
+             ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF);
+       rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index a3c1d82032f55..aa2ab987eb752 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -580,6 +580,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
+       u64 lmt_addr, val, tbl_base;
+       int pf, vf, num_vfs, hw_vfs;
+       void __iomem *lmt_map_base;
++      int apr_pfs, apr_vfs;
+       int buf_size = 10240;
+       size_t off = 0;
+       int index = 0;
+@@ -595,8 +596,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
+               return -ENOMEM;
+       tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
++      val  = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG);
++      apr_vfs = 1 << (val & 0xF);
++      apr_pfs = 1 << ((val >> 4) & 0x7);
+-      lmt_map_base = ioremap_wc(tbl_base, 128 * 1024);
++      lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs *
++                                LMT_MAPTBL_ENTRY_SIZE);
+       if (!lmt_map_base) {
+               dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n");
+               kfree(buf);
+@@ -618,7 +623,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
+               off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d  \t\t\t",
+                                   pf);
+-              index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE;
++              index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE;
+               off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t",
+                                (tbl_base + index));
+               lmt_addr = readq(lmt_map_base + index);
+@@ -631,7 +636,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
+               /* Reading num of VFs per PF */
+               rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs);
+               for (vf = 0; vf < num_vfs; vf++) {
+-                      index = (pf * rvu->hw->total_vfs * 16) +
++                      index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) +
+                               ((vf + 1)  * LMT_MAPTBL_ENTRY_SIZE);
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                           "PF%d:VF%d  \t\t", pf, vf);
+-- 
+2.39.5
+
diff --git a/queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch b/queue-6.1/octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch
new file mode 100644 (file)
index 0000000..013f8f8
--- /dev/null
@@ -0,0 +1,76 @@
+From dfd0231df3e4b1b886b044e375e07c68c3bdb3a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 May 2025 11:38:33 +0530
+Subject: octeontx2-af: Set LMT_ENA bit for APR table entries
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit 0eefa27b493306928d88af6368193b134c98fd64 ]
+
+This patch enables the LMT line for a PF/VF by setting the
+LMT_ENA bit in the APR_LMT_MAP_ENTRY_S structure.
+
+Additionally, it simplifies the logic for calculating the
+LMTST table index by consistently using the maximum
+number of hw supported VFs (i.e., 256).
+
+Fixes: 873a1e3d207a ("octeontx2-af: cn10k: Setting up lmtst map table").
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250521060834.19780-2-gakula@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+index f9faa5b23bb9d..6ec0609074dca 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
+@@ -15,13 +15,17 @@
+ #define LMT_TBL_OP_WRITE      1
+ #define LMT_MAP_TABLE_SIZE    (128 * 1024)
+ #define LMT_MAPTBL_ENTRY_SIZE 16
++#define LMT_MAX_VFS           256
++
++#define LMT_MAP_ENTRY_ENA      BIT_ULL(20)
++#define LMT_MAP_ENTRY_LINES    GENMASK_ULL(18, 16)
+ /* Function to perform operations (read/write) on lmtst map table */
+ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
+                              int lmt_tbl_op)
+ {
+       void __iomem *lmt_map_base;
+-      u64 tbl_base;
++      u64 tbl_base, cfg;
+       tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE);
+@@ -35,6 +39,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
+               *val = readq(lmt_map_base + index);
+       } else {
+               writeq((*val), (lmt_map_base + index));
++
++              cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1);
++              /* 2048 LMTLINES */
++              cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6);
++
++              writeq(cfg, (lmt_map_base + (index + 8)));
++
+               /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S
+                * changes effective. Write 1 for flush and read is being used as a
+                * barrier and sets up a data dependency. Write to 0 after a write
+@@ -52,7 +63,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
+ #define LMT_MAP_TBL_W1_OFF  8
+ static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc)
+ {
+-      return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) +
++      return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) +
+               (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch b/queue-6.1/octeontx2-pf-add-af_xdp-non-zero-copy-support.patch
new file mode 100644 (file)
index 0000000..30710bc
--- /dev/null
@@ -0,0 +1,51 @@
+From 203e22ead866fb870b6b82d04c20571491035768 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 11:01:37 +0530
+Subject: octeontx2-pf: Add AF_XDP non-zero copy support
+
+From: Suman Ghosh <sumang@marvell.com>
+
+[ Upstream commit b4164de5041b51cda3438e75bce668e2556057c3 ]
+
+Set xdp rx ring memory type as MEM_TYPE_PAGE_POOL for
+af-xdp to work. This is needed since xdp_return_frame
+internally will use page pools.
+
+Fixes: 06059a1a9a4a ("octeontx2-pf: Add XDP support to netdev PF")
+Signed-off-by: Suman Ghosh <sumang@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index 5e11599d13223..59a7e6f376f47 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -988,6 +988,7 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+       int err, pool_id, non_xdp_queues;
+       struct nix_aq_enq_req *aq;
+       struct otx2_cq_queue *cq;
++      struct otx2_pool *pool;
+       cq = &qset->cq[qidx];
+       cq->cq_idx = qidx;
+@@ -996,8 +997,13 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+               cq->cq_type = CQ_RX;
+               cq->cint_idx = qidx;
+               cq->cqe_cnt = qset->rqe_cnt;
+-              if (pfvf->xdp_prog)
++              if (pfvf->xdp_prog) {
++                      pool = &qset->pool[qidx];
+                       xdp_rxq_info_reg(&cq->xdp_rxq, pfvf->netdev, qidx, 0);
++                      xdp_rxq_info_reg_mem_model(&cq->xdp_rxq,
++                                                 MEM_TYPE_PAGE_POOL,
++                                                 pool->page_pool);
++              }
+       } else if (qidx < non_xdp_queues) {
+               cq->cq_type = CQ_TX;
+               cq->cint_idx = qidx - pfvf->hw.rx_queues;
+-- 
+2.39.5
+
diff --git a/queue-6.1/octeontx2-pf-add-support-for-page-pool.patch b/queue-6.1/octeontx2-pf-add-support-for-page-pool.patch
new file mode 100644 (file)
index 0000000..d52e09f
--- /dev/null
@@ -0,0 +1,365 @@
+From 06c9a36ad9e3c22f0e5169626a0d9d56b5c56f87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 07:34:04 +0530
+Subject: octeontx2-pf: Add support for page pool
+
+From: Ratheesh Kannoth <rkannoth@marvell.com>
+
+[ Upstream commit b2e3406a38f0f48b1dfb81e5bb73d243ff6af179 ]
+
+Page pool for each rx queue enhance rx side performance
+by reclaiming buffers back to each queue specific pool. DMA
+mapping is done only for first allocation of buffers.
+As subsequent buffers allocation avoid DMA mapping,
+it results in performance improvement.
+
+Image        |  Performance
+------------ | ------------
+Vannila      |   3Mpps
+             |
+with this    |   42Mpps
+change      |
+---------------------------
+
+Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
+Link: https://lore.kernel.org/r/20230522020404.152020-1-rkannoth@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: b4164de5041b ("octeontx2-pf: Add AF_XDP non-zero copy support")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/Kconfig    |  1 +
+ .../marvell/octeontx2/nic/otx2_common.c       | 78 ++++++++++++++++---
+ .../marvell/octeontx2/nic/otx2_common.h       |  6 +-
+ .../ethernet/marvell/octeontx2/nic/otx2_pf.c  | 11 ++-
+ .../marvell/octeontx2/nic/otx2_txrx.c         | 19 +++--
+ .../marvell/octeontx2/nic/otx2_txrx.h         |  1 +
+ .../ethernet/marvell/octeontx2/nic/qos_sq.c   |  2 +-
+ 7 files changed, 96 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig
+index 993ac180a5db8..a32d85d6f599f 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/Kconfig
++++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig
+@@ -32,6 +32,7 @@ config OCTEONTX2_PF
+       tristate "Marvell OcteonTX2 NIC Physical Function driver"
+       select OCTEONTX2_MBOX
+       select NET_DEVLINK
++      select PAGE_POOL
+       depends on (64BIT && COMPILE_TEST) || ARM64
+       select DIMLIB
+       depends on PCI
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+index d05f91f97a9af..5e11599d13223 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+@@ -513,11 +513,32 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
+                    (pfvf->hw.cq_ecount_wait - 1));
+ }
++static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool,
++                             dma_addr_t *dma)
++{
++      unsigned int offset = 0;
++      struct page *page;
++      size_t sz;
++
++      sz = SKB_DATA_ALIGN(pool->rbsize);
++      sz = ALIGN(sz, OTX2_ALIGN);
++
++      page = page_pool_alloc_frag(pool->page_pool, &offset, sz, GFP_ATOMIC);
++      if (unlikely(!page))
++              return -ENOMEM;
++
++      *dma = page_pool_get_dma_addr(page) + offset;
++      return 0;
++}
++
+ static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+                            dma_addr_t *dma)
+ {
+       u8 *buf;
++      if (pool->page_pool)
++              return otx2_alloc_pool_buf(pfvf, pool, dma);
++
+       buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN);
+       if (unlikely(!buf))
+               return -ENOMEM;
+@@ -1206,10 +1227,31 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
+       }
+ }
++void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
++                  u64 iova, int size)
++{
++      struct page *page;
++      u64 pa;
++
++      pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
++      page = virt_to_head_page(phys_to_virt(pa));
++
++      if (pool->page_pool) {
++              page_pool_put_full_page(pool->page_pool, page, true);
++      } else {
++              dma_unmap_page_attrs(pfvf->dev, iova, size,
++                                   DMA_FROM_DEVICE,
++                                   DMA_ATTR_SKIP_CPU_SYNC);
++
++              put_page(page);
++      }
++}
++
+ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
+ {
+       int pool_id, pool_start = 0, pool_end = 0, size = 0;
+-      u64 iova, pa;
++      struct otx2_pool *pool;
++      u64 iova;
+       if (type == AURA_NIX_SQ) {
+               pool_start = otx2_get_pool_idx(pfvf, type, 0);
+@@ -1225,15 +1267,13 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
+       /* Free SQB and RQB pointers from the aura pool */
+       for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
+               iova = otx2_aura_allocptr(pfvf, pool_id);
++              pool = &pfvf->qset.pool[pool_id];
+               while (iova) {
+                       if (type == AURA_NIX_RQ)
+                               iova -= OTX2_HEAD_ROOM;
+-                      pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+-                      dma_unmap_page_attrs(pfvf->dev, iova, size,
+-                                           DMA_FROM_DEVICE,
+-                                           DMA_ATTR_SKIP_CPU_SYNC);
+-                      put_page(virt_to_page(phys_to_virt(pa)));
++                      otx2_free_bufs(pfvf, pool, iova, size);
++
+                       iova = otx2_aura_allocptr(pfvf, pool_id);
+               }
+       }
+@@ -1251,6 +1291,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf)
+               pool = &pfvf->qset.pool[pool_id];
+               qmem_free(pfvf->dev, pool->stack);
+               qmem_free(pfvf->dev, pool->fc_addr);
++              page_pool_destroy(pool->page_pool);
++              pool->page_pool = NULL;
+       }
+       devm_kfree(pfvf->dev, pfvf->qset.pool);
+       pfvf->qset.pool = NULL;
+@@ -1334,8 +1376,9 @@ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+ }
+ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+-                 int stack_pages, int numptrs, int buf_size)
++                 int stack_pages, int numptrs, int buf_size, int type)
+ {
++      struct page_pool_params pp_params = { 0 };
+       struct npa_aq_enq_req *aq;
+       struct otx2_pool *pool;
+       int err;
+@@ -1379,6 +1422,22 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+       aq->ctype = NPA_AQ_CTYPE_POOL;
+       aq->op = NPA_AQ_INSTOP_INIT;
++      if (type != AURA_NIX_RQ) {
++              pool->page_pool = NULL;
++              return 0;
++      }
++
++      pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP;
++      pp_params.pool_size = numptrs;
++      pp_params.nid = NUMA_NO_NODE;
++      pp_params.dev = pfvf->dev;
++      pp_params.dma_dir = DMA_FROM_DEVICE;
++      pool->page_pool = page_pool_create(&pp_params);
++      if (IS_ERR(pool->page_pool)) {
++              netdev_err(pfvf->netdev, "Creation of page pool failed\n");
++              return PTR_ERR(pool->page_pool);
++      }
++
+       return 0;
+ }
+@@ -1413,7 +1472,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+               /* Initialize pool context */
+               err = otx2_pool_init(pfvf, pool_id, stack_pages,
+-                                   num_sqbs, hw->sqb_size);
++                                   num_sqbs, hw->sqb_size, AURA_NIX_SQ);
+               if (err)
+                       goto fail;
+       }
+@@ -1476,7 +1535,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
+       }
+       for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+               err = otx2_pool_init(pfvf, pool_id, stack_pages,
+-                                   num_ptrs, pfvf->rbsize);
++                                   num_ptrs, pfvf->rbsize, AURA_NIX_RQ);
+               if (err)
+                       goto fail;
+       }
+@@ -1660,7 +1719,6 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable)
+       req->bpid_per_chan = 0;
+ #endif
+-
+       return otx2_sync_mbox_msg(&pfvf->mbox);
+ }
+ EXPORT_SYMBOL(otx2_nix_config_bp);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index c15d1864a6371..4f0ac8158ed12 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -934,7 +934,7 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+ int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
+ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
+ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
+-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
++void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx);
+ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura);
+ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
+@@ -942,7 +942,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
+ int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
+                     dma_addr_t *dma);
+ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+-                 int stack_pages, int numptrs, int buf_size);
++                 int stack_pages, int numptrs, int buf_size, int type);
+ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+                  int pool_id, int numptrs);
+@@ -1012,6 +1012,8 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
+ int otx2_handle_ntuple_tc_features(struct net_device *netdev,
+                                  netdev_features_t features);
+ int otx2_smq_flush(struct otx2_nic *pfvf, int smq);
++void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool,
++                  u64 iova, int size);
+ /* tc support */
+ int otx2_init_tc(struct otx2_nic *nic);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 6b7fb324e756e..8385b46736934 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1591,7 +1591,9 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+       struct nix_lf_free_req *free_req;
+       struct mbox *mbox = &pf->mbox;
+       struct otx2_cq_queue *cq;
++      struct otx2_pool *pool;
+       struct msg_req *req;
++      int pool_id;
+       int qidx;
+       /* Ensure all SQE are processed */
+@@ -1618,7 +1620,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+       for (qidx = 0; qidx < qset->cq_cnt; qidx++) {
+               cq = &qset->cq[qidx];
+               if (cq->cq_type == CQ_RX)
+-                      otx2_cleanup_rx_cqes(pf, cq);
++                      otx2_cleanup_rx_cqes(pf, cq, qidx);
+               else
+                       otx2_cleanup_tx_cqes(pf, cq);
+       }
+@@ -1629,6 +1631,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
+       /* Free RQ buffer pointers*/
+       otx2_free_aura_ptr(pf, AURA_NIX_RQ);
++      for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) {
++              pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx);
++              pool = &pf->qset.pool[pool_id];
++              page_pool_destroy(pool->page_pool);
++              pool->page_pool = NULL;
++      }
++
+       otx2_free_cq_res(pf);
+       /* Free all ingress bandwidth profiles allocated */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+index e579183e52392..cc704cd3b5ae1 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+@@ -218,9 +218,6 @@ static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb,
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                               va - page_address(page) + off,
+                               len - off, pfvf->rbsize);
+-
+-              otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM,
+-                                  pfvf->rbsize, DMA_FROM_DEVICE);
+               return true;
+       }
+@@ -383,6 +380,8 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
+       if (pfvf->netdev->features & NETIF_F_RXCSUM)
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
++      skb_mark_for_recycle(skb);
++
+       napi_gro_frags(napi);
+ }
+@@ -1191,11 +1190,13 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
+ }
+ EXPORT_SYMBOL(otx2_sq_append_skb);
+-void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
++void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx)
+ {
+       struct nix_cqe_rx_s *cqe;
++      struct otx2_pool *pool;
+       int processed_cqe = 0;
+-      u64 iova, pa;
++      u16 pool_id;
++      u64 iova;
+       if (pfvf->xdp_prog)
+               xdp_rxq_info_unreg(&cq->xdp_rxq);
+@@ -1203,6 +1204,9 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+       if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+               return;
++      pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
++      pool = &pfvf->qset.pool[pool_id];
++
+       while (cq->pend_cqe) {
+               cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
+               processed_cqe++;
+@@ -1215,9 +1219,8 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+                       continue;
+               }
+               iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM;
+-              pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+-              otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE);
+-              put_page(virt_to_page(phys_to_virt(pa)));
++
++              otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize);
+       }
+       /* Free CQEs to HW */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+index 7ab6db9a986fa..b5d689eeff80b 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+@@ -118,6 +118,7 @@ struct otx2_cq_poll {
+ struct otx2_pool {
+       struct qmem             *stack;
+       struct qmem             *fc_addr;
++      struct page_pool        *page_pool;
+       u16                     rbsize;
+ };
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+index e142d43f5a62c..95a2c8e616bd8 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c
+@@ -63,7 +63,7 @@ static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx)
+       /* Initialize pool context */
+       err = otx2_pool_init(pfvf, pool_id, stack_pages,
+-                           num_sqbs, hw->sqb_size);
++                           num_sqbs, hw->sqb_size, AURA_NIX_SQ);
+       if (err)
+               goto aura_free;
+-- 
+2.39.5
+
diff --git a/queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch b/queue-6.1/remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch
new file mode 100644 (file)
index 0000000..6d6e8d0
--- /dev/null
@@ -0,0 +1,45 @@
+From 4088d71aed0f5e4dc72d21f8534000687f203b5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 May 2025 02:40:15 +0300
+Subject: remoteproc: qcom_wcnss: Fix on platforms without fallback regulators
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Matti Lehtimäki <matti.lehtimaki@gmail.com>
+
+[ Upstream commit 4ca45af0a56d00b86285d6fdd720dca3215059a7 ]
+
+Recent change to handle platforms with only single power domain broke
+pronto-v3 which requires power domains and doesn't have fallback voltage
+regulators in case power domains are missing. Add a check to verify
+the number of fallback voltage regulators before using the code which
+handles single power domain situation.
+
+Fixes: 65991ea8a6d1 ("remoteproc: qcom_wcnss: Handle platforms with only single power domain")
+Signed-off-by: Matti Lehtimäki <matti.lehtimaki@gmail.com>
+Tested-by: Luca Weiss <luca.weiss@fairphone.com> # sdm632-fairphone-fp3
+Link: https://lore.kernel.org/r/20250511234026.94735-1-matti.lehtimaki@gmail.com
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/remoteproc/qcom_wcnss.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
+index ce61e0e7cbeb8..af96541c9b69a 100644
+--- a/drivers/remoteproc/qcom_wcnss.c
++++ b/drivers/remoteproc/qcom_wcnss.c
+@@ -445,7 +445,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss,
+       if (wcnss->num_pds) {
+               info += wcnss->num_pds;
+               /* Handle single power domain case */
+-              num_vregs += num_pd_vregs - wcnss->num_pds;
++              if (wcnss->num_pds < num_pd_vregs)
++                      num_vregs += num_pd_vregs - wcnss->num_pds;
+       } else {
+               num_vregs += num_pd_vregs;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch b/queue-6.1/sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch
new file mode 100644 (file)
index 0000000..6d79e88
--- /dev/null
@@ -0,0 +1,62 @@
+From e5a1bb921828d93b9eb67fa21178fd4ffc7ba80f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 18 May 2025 15:20:37 -0700
+Subject: sch_hfsc: Fix qlen accounting bug when using peek in hfsc_enqueue()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 3f981138109f63232a5fb7165938d4c945cc1b9d ]
+
+When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the
+child qdisc's peek() operation before incrementing sch->q.qlen and
+sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may
+trigger an immediate dequeue and potential packet drop. In such cases,
+qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog
+have not yet been updated, leading to inconsistent queue accounting. This
+can leave an empty HFSC class in the active list, causing further
+consequences like use-after-free.
+
+This patch fixes the bug by moving the increment of sch->q.qlen and
+sch->qstats.backlog before the call to the child qdisc's peek() operation.
+This ensures that queue length and backlog are always accurate when packet
+drops or dequeues are triggered during the peek.
+
+Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline")
+Reported-by: Mingi Cho <mincho@theori.io>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com
+Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_hfsc.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index fc1370c293730..ec6ee45100132 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1568,6 +1568,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+               return err;
+       }
++      sch->qstats.backlog += len;
++      sch->q.qlen++;
++
+       if (first && !cl->cl_nactive) {
+               if (cl->cl_flags & HFSC_RSC)
+                       init_ed(cl, len);
+@@ -1583,9 +1586,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+       }
+-      sch->qstats.backlog += len;
+-      sch->q.qlen++;
+-
+       return NET_XMIT_SUCCESS;
+ }
+-- 
+2.39.5
+
index 2afdd7c76c4a54a56aadde1193c40234c2bdfa7f..b3d6b37bc010fbe9908175bd29cb0c6750ecd905 100644 (file)
@@ -230,3 +230,23 @@ btrfs-correct-the-order-of-prelim_ref-arguments-in-b.patch
 wifi-iwlwifi-add-support-for-killer-on-mtl.patch
 xenbus-allow-pvh-dom0-a-non-local-xenstore.patch
 __legitimize_mnt-check-for-mnt_sync_umount-should-be.patch
+espintcp-remove-encap-socket-caching-to-avoid-refere.patch
+dmaengine-idxd-add-per-dsa-wq-workqueue-for-processi.patch
+dmaengine-idxd-add-idxd_copy_cr-to-copy-user-complet.patch
+dmaengine-idxd-fix-allowing-write-from-different-add.patch
+remoteproc-qcom_wcnss-fix-on-platforms-without-fallb.patch
+clk-sunxi-ng-d1-add-missing-divider-for-mmc-mod-cloc.patch
+xfrm-sanitize-marks-before-insert.patch
+dmaengine-idxd-fix-poll-return-value.patch
+bluetooth-l2cap-fix-not-checking-l2cap_chan-security.patch
+bridge-netfilter-fix-forwarding-of-fragmented-packet.patch
+ice-fix-vf-num_mac-count-with-port-representors.patch
+net-dwmac-sun8i-use-parsed-internal-phy-address-inst.patch
+net-lan743x-restore-sgmii-ctrl-register-on-resume.patch
+io_uring-fix-overflow-resched-cqe-reordering.patch
+sch_hfsc-fix-qlen-accounting-bug-when-using-peek-in-.patch
+octeontx2-pf-add-support-for-page-pool.patch
+octeontx2-pf-add-af_xdp-non-zero-copy-support.patch
+net-tipc-fix-slab-use-after-free-read-in-tipc_aead_e.patch
+octeontx2-af-set-lmt_ena-bit-for-apr-table-entries.patch
+octeontx2-af-fix-apr-entry-mapping-based-on-apr_lmt_.patch
diff --git a/queue-6.1/xfrm-sanitize-marks-before-insert.patch b/queue-6.1/xfrm-sanitize-marks-before-insert.patch
new file mode 100644 (file)
index 0000000..82c1c56
--- /dev/null
@@ -0,0 +1,71 @@
+From d729f33af12fba969f96b3d6eef5e8ea3f3f7fb5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 7 May 2025 13:31:58 +0200
+Subject: xfrm: Sanitize marks before insert
+
+From: Paul Chaignon <paul.chaignon@gmail.com>
+
+[ Upstream commit 0b91fda3a1f044141e1e615456ff62508c32b202 ]
+
+Prior to this patch, the mark is sanitized (applying the state's mask to
+the state's value) only on inserts when checking if a conflicting XFRM
+state or policy exists.
+
+We discovered in Cilium that this same sanitization does not occur
+in the hot-path __xfrm_state_lookup. In the hot-path, the sk_buff's mark
+is simply compared to the state's value:
+
+    if ((mark & x->mark.m) != x->mark.v)
+        continue;
+
+Therefore, users can define unsanitized marks (ex. 0xf42/0xf00) which will
+never match any packet.
+
+This commit updates __xfrm_state_insert and xfrm_policy_insert to store
+the sanitized marks, thus removing this footgun.
+
+This has the side effect of changing the ip output, as the
+returned mark will have the mask applied to it when printed.
+
+Fixes: 3d6acfa7641f ("xfrm: SA lookups with mark")
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Signed-off-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
+Co-developed-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 3 +++
+ net/xfrm/xfrm_state.c  | 3 +++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index a022f49846879..e015ff225b27a 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1597,6 +1597,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+       struct xfrm_policy *delpol;
+       struct hlist_head *chain;
++      /* Sanitize mark before store */
++      policy->mark.v &= policy->mark.m;
++
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+       chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
+       if (chain)
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index b5047a94c7d01..58c53bb1c5838 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -1275,6 +1275,9 @@ static void __xfrm_state_insert(struct xfrm_state *x)
+       list_add(&x->km.all, &net->xfrm.state_all);
++      /* Sanitize mark before store */
++      x->mark.v &= x->mark.m;
++
+       h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
+                         x->props.reqid, x->props.family);
+       hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
+-- 
+2.39.5
+