From 72e4682f7073fe531253429e93b677b92ade2532 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 5 Jun 2026 18:05:55 +0200 Subject: [PATCH] 6.18-stable patches added patches: arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch mptcp-borrow-forward-memory-from-subflow.patch mptcp-cleanup-fallback-dummy-mapping-generation.patch mptcp-do-not-drop-partial-packets.patch mptcp-handle-first-subflow-closing-consistently.patch mptcp-reset-rcv-wnd-on-disconnect.patch net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch platform-x86-intel-vsec-make-driver_data-info-const.patch platform-x86-intel-vsec-refactor-base_addr-handling.patch rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch selftests-mptcp-drop-nanoseconds-width-specifier.patch serdev-provide-a-bustype-shutdown-function.patch usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch --- ...walk-cache-when-unsharing-pmd-tables.patch | 55 ++ ...a-convert-timeout-from-jiffies-to-ms.patch | 154 +++++ ...to-serdev-specific-shutdown-function.patch | 53 ++ ...tate-add-and-use-hybrid_get_cpu_type.patch | 60 ++ ...rect-scaling-factor-on-raptor-lake-e.patch | 49 ++ ...p-borrow-forward-memory-from-subflow.patch | 200 ++++++ ...up-fallback-dummy-mapping-generation.patch | 74 ++ .../mptcp-do-not-drop-partial-packets.patch | 78 +++ ...e-first-subflow-closing-consistently.patch | 104 +++ .../mptcp-reset-rcv-wnd-on-disconnect.patch | 47 ++ ...h-non-page-aligned-size-or-sg-length.patch | 73 ++ ...ree-of-pool-stack-on-aq-init-failure.patch | 62 ++ ...cnt-imbalance-on-pcie-error-recovery.patch | 131 ++++ ...tel-vsec-make-driver_data-info-const.patch | 144 ++++ ...tel-vsec-refactor-base_addr-handling.patch | 124 ++++ ...by-copying-data-to-buffer-in-recvmsg.patch | 646 ++++++++++++++++++ ...on-to-extract-skb-to-a-linear-buffer.patch | 614 +++++++++++++++++ ...tcp-drop-nanoseconds-width-specifier.patch | 112 +++ ...-provide-a-bustype-shutdown-function.patch | 88 +++ queue-6.18/series | 22 + ...-handling-in-zynqmp-init-error-paths.patch | 96 +++ ...cast-tlb-flush-when-pcid-is-disabled.patch | 73 ++ ...reset-runtime-state-when-cloning-sas.patch | 96 +++ 23 files changed, 3155 insertions(+) create mode 100644 queue-6.18/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch create mode 100644 queue-6.18/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch create mode 100644 queue-6.18/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch create mode 100644 queue-6.18/cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch create mode 100644 queue-6.18/cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch create mode 100644 queue-6.18/mptcp-borrow-forward-memory-from-subflow.patch create mode 100644 queue-6.18/mptcp-cleanup-fallback-dummy-mapping-generation.patch create mode 100644 queue-6.18/mptcp-do-not-drop-partial-packets.patch create mode 100644 queue-6.18/mptcp-handle-first-subflow-closing-consistently.patch create mode 100644 queue-6.18/mptcp-reset-rcv-wnd-on-disconnect.patch create mode 100644 queue-6.18/net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch create mode 100644 queue-6.18/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch create mode 100644 queue-6.18/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch create mode 100644 queue-6.18/platform-x86-intel-vsec-make-driver_data-info-const.patch create mode 100644 queue-6.18/platform-x86-intel-vsec-refactor-base_addr-handling.patch create mode 100644 queue-6.18/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch create mode 100644 queue-6.18/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch create mode 100644 queue-6.18/selftests-mptcp-drop-nanoseconds-width-specifier.patch create mode 100644 queue-6.18/serdev-provide-a-bustype-shutdown-function.patch create mode 100644 queue-6.18/usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch create mode 100644 queue-6.18/x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch create mode 100644 queue-6.18/xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch diff --git a/queue-6.18/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch b/queue-6.18/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch new file mode 100644 index 0000000000..aa64e1bc7e --- /dev/null +++ b/queue-6.18/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch @@ -0,0 +1,55 @@ +From stable+bounces-259392-greg=kroah.com@vger.kernel.org Mon Jun 1 02:41:00 2026 +From: Sasha Levin +Date: Sun, 31 May 2026 20:38:49 -0400 +Subject: arm64: tlb: Flush walk cache when unsharing PMD tables +To: stable@vger.kernel.org +Cc: Zeng Heng , Catalin Marinas , Sasha Levin +Message-ID: <20260601003849.83368-1-sashal@kernel.org> + +From: Zeng Heng + +[ Upstream commit c2ff4764e03e7a8d758352f4aceb8fe1be6ac971 ] + +When huge_pmd_unshare() is called to unshare a PMD table, the +tlb_unshare_pmd_ptdesc() function sets tlb->unshared_tables=true +but the aarch64 tlb_flush() only checked tlb->freed_tables to +determine whether to use TLBF_NONE (vae1is, invalidates walk +cache) or TLBF_NOWALKCACHE (vale1is, leaf-only). + +This caused the stale PMD page table entry to remain in the walk cache +after unshare, potentially leading to incorrect page table walks. + +Fix by including unshared_tables in the check, so that when +unsharing tables, TLBF_NONE is used and the walk cache is properly +invalidated. + +Here is the detailed distinction between vae1is and vale1is: + +| Instruction Combination | Actual Invalidation Scope | +| ------------------------ | --------------------------------------------------| +| `VAE1IS` + TTL=`0` | All entries at all levels (full invalidation) | +| `VAE1IS` + TTL=`2` (L2) | Non-leaf at Level 0/1 + leaf at Level 2 | +| `VALE1IS` + TTL=`0` | Leaf entries at all levels (non-leaf not cleared) | +| `VALE1IS` + TTL=`2` (L2) | Leaf entry at Level 2 only | + +Signed-off-by: Zeng Heng +Fixes: 8ce720d5bd91 ("mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather") +Cc: +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/tlb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/include/asm/tlb.h ++++ b/arch/arm64/include/asm/tlb.h +@@ -53,7 +53,7 @@ static inline int tlb_get_level(struct m + static inline void tlb_flush(struct mmu_gather *tlb) + { + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); +- bool last_level = !tlb->freed_tables; ++ bool last_level = !(tlb->freed_tables || tlb->unshared_tables); + unsigned long stride = tlb_get_unmap_size(tlb); + int tlb_level = tlb_get_level(tlb); + diff --git a/queue-6.18/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch b/queue-6.18/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch new file mode 100644 index 0000000000..3bafad481c --- /dev/null +++ b/queue-6.18/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch @@ -0,0 +1,154 @@ +From stable+bounces-256737-greg=kroah.com@vger.kernel.org Fri May 29 21:24:34 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:23:50 -0400 +Subject: Bluetooth: hci_qca: Convert timeout from jiffies to ms +To: stable@vger.kernel.org +Cc: Shuai Zhang , Paul Menzel , Bartosz Golaszewski , Luiz Augusto von Dentz , Sasha Levin +Message-ID: <20260529192351.1696591-3-sashal@kernel.org> + +From: Shuai Zhang + +[ Upstream commit 375ba7484132662a4a8c7547d088fb6275c00282 ] + +Since the timer uses jiffies as its unit rather than ms, the timeout value +must be converted from ms to jiffies when configuring the timer. Otherwise, +the intended 8s timeout is incorrectly set to approximately 33s. + +To improve readability, embed msecs_to_jiffies() directly in the macro +definitions and drop the _MS suffix from macros that now yield jiffies +values: MEMDUMP_TIMEOUT, FW_DOWNLOAD_TIMEOUT, IBS_DISABLE_SSR_TIMEOUT, +CMD_TRANS_TIMEOUT, and IBS_BTSOC_TX_IDLE_TIMEOUT. + +IBS_WAKE_RETRANS_TIMEOUT_MS and IBS_HOST_TX_IDLE_TIMEOUT_MS are +intentionally left unchanged. Their values are stored in the struct fields +wake_retrans and tx_idle_delay, which hold ms values at runtime and can be +modified via debugfs. The msecs_to_jiffies() conversion happens at each +call site against the field value, so it cannot be embedded in the macro. + +Wake timer depends on commit c347ca17d62a + +Cc: stable@vger.kernel.org +Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR") +Reviewed-by: Paul Menzel +Acked-by: Bartosz Golaszewski +Signed-off-by: Shuai Zhang +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/hci_qca.c | 33 ++++++++++++++++----------------- + 1 file changed, 16 insertions(+), 17 deletions(-) + +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -48,13 +48,12 @@ + #define HCI_MAX_IBS_SIZE 10 + + #define IBS_WAKE_RETRANS_TIMEOUT_MS 100 +-#define IBS_BTSOC_TX_IDLE_TIMEOUT_MS 200 ++#define IBS_BTSOC_TX_IDLE_TIMEOUT msecs_to_jiffies(200) + #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 +-#define CMD_TRANS_TIMEOUT_MS 100 +-#define MEMDUMP_TIMEOUT_MS 8000 +-#define IBS_DISABLE_SSR_TIMEOUT_MS \ +- (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS) +-#define FW_DOWNLOAD_TIMEOUT_MS 3000 ++#define CMD_TRANS_TIMEOUT msecs_to_jiffies(100) ++#define MEMDUMP_TIMEOUT msecs_to_jiffies(8000) ++#define FW_DOWNLOAD_TIMEOUT msecs_to_jiffies(3000) ++#define IBS_DISABLE_SSR_TIMEOUT (MEMDUMP_TIMEOUT + FW_DOWNLOAD_TIMEOUT) + + /* susclk rate */ + #define SUSCLK_RATE_32KHZ 32768 +@@ -1091,7 +1090,7 @@ static void qca_controller_memdump(struc + + queue_delayed_work(qca->workqueue, + &qca->ctrl_memdump_timeout, +- msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)); ++ MEMDUMP_TIMEOUT); + skb_pull(skb, sizeof(qca_memdump->ram_dump_size)); + qca_memdump->current_seq_no = 0; + qca_memdump->received_dump = 0; +@@ -1364,7 +1363,7 @@ static int qca_set_baudrate(struct hci_d + + if (hu->serdev) + serdev_device_wait_until_sent(hu->serdev, +- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); ++ CMD_TRANS_TIMEOUT); + + /* Give the controller time to process the request */ + switch (qca_soc_type(hu)) { +@@ -1396,8 +1395,8 @@ static inline void host_set_baudrate(str + + static int qca_send_power_pulse(struct hci_uart *hu, bool on) + { ++ int timeout = CMD_TRANS_TIMEOUT; + int ret; +- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); + u8 cmd = on ? QCA_WCN3990_POWERON_PULSE : QCA_WCN3990_POWEROFF_PULSE; + + /* These power pulses are single byte command which are sent +@@ -1602,7 +1601,7 @@ static void qca_wait_for_dump_collection + struct qca_data *qca = hu->priv; + + wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, +- TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); ++ TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT); + + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + } +@@ -2570,7 +2569,7 @@ static void qca_serdev_remove(struct ser + static void qca_serdev_shutdown(struct serdev_device *serdev) + { + int ret; +- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); ++ int timeout = CMD_TRANS_TIMEOUT; + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct hci_uart *hu = &qcadev->serdev_hu; + struct hci_dev *hdev = hu->hdev; +@@ -2627,7 +2626,7 @@ static int __maybe_unused qca_suspend(st + bool tx_pending = false; + int ret = 0; + u8 cmd; +- u32 wait_timeout = 0; ++ unsigned long wait_timeout = 0; + + set_bit(QCA_SUSPENDING, &qca->flags); + +@@ -2648,15 +2647,15 @@ static int __maybe_unused qca_suspend(st + if (test_bit(QCA_IBS_DISABLED, &qca->flags) || + test_bit(QCA_SSR_TRIGGERED, &qca->flags)) { + wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? +- IBS_DISABLE_SSR_TIMEOUT_MS : +- FW_DOWNLOAD_TIMEOUT_MS; ++ IBS_DISABLE_SSR_TIMEOUT : ++ FW_DOWNLOAD_TIMEOUT; + + /* QCA_IBS_DISABLED flag is set to true, During FW download + * and during memory dump collection. It is reset to false, + * After FW download complete. + */ + wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, +- TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); ++ TASK_UNINTERRUPTIBLE, wait_timeout); + + if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { + bt_dev_err(hu->hdev, "SSR or FW download time out"); +@@ -2708,7 +2707,7 @@ static int __maybe_unused qca_suspend(st + + if (tx_pending) { + serdev_device_wait_until_sent(hu->serdev, +- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); ++ CMD_TRANS_TIMEOUT); + serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu); + } + +@@ -2717,7 +2716,7 @@ static int __maybe_unused qca_suspend(st + */ + ret = wait_event_interruptible_timeout(qca->suspend_wait_q, + qca->rx_ibs_state == HCI_IBS_RX_ASLEEP, +- msecs_to_jiffies(IBS_BTSOC_TX_IDLE_TIMEOUT_MS)); ++ IBS_BTSOC_TX_IDLE_TIMEOUT); + if (ret == 0) { + ret = -ETIMEDOUT; + goto error; diff --git a/queue-6.18/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch b/queue-6.18/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch new file mode 100644 index 0000000000..435804ca6d --- /dev/null +++ b/queue-6.18/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch @@ -0,0 +1,53 @@ +From sashal@kernel.org Fri May 29 21:23:54 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:23:49 -0400 +Subject: Bluetooth: hci_qca: Migrate to serdev specific shutdown function +To: stable@vger.kernel.org +Cc: "Uwe Kleine-König" , "Greg Kroah-Hartman" , "Sasha Levin" +Message-ID: <20260529192351.1696591-2-sashal@kernel.org> + +From: Uwe Kleine-König + +[ Upstream commit 12a6a5726c515455935982429ac35dee2307233d ] + +This saves a cast in the driver. The motivation is stop using the callback +.shutdown in qca_serdev_driver.driver to make it possible to drop that. + +Signed-off-by: Uwe Kleine-König +Link: https://patch.msgid.link/261a3384e25c4837d4efee87958805f15d7d4e3c.1765526117.git.u.kleine-koenig@baylibre.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 375ba7484132 ("Bluetooth: hci_qca: Convert timeout from jiffies to ms") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/hci_qca.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -2567,11 +2567,10 @@ static void qca_serdev_remove(struct ser + hci_uart_unregister_device(&qcadev->serdev_hu); + } + +-static void qca_serdev_shutdown(struct device *dev) ++static void qca_serdev_shutdown(struct serdev_device *serdev) + { + int ret; + int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); +- struct serdev_device *serdev = to_serdev_device(dev); + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct hci_uart *hu = &qcadev->serdev_hu; + struct hci_dev *hdev = hu->hdev; +@@ -2793,11 +2792,11 @@ static void hciqca_coredump(struct devic + static struct serdev_device_driver qca_serdev_driver = { + .probe = qca_serdev_probe, + .remove = qca_serdev_remove, ++ .shutdown = qca_serdev_shutdown, + .driver = { + .name = "hci_uart_qca", + .of_match_table = of_match_ptr(qca_bluetooth_of_match), + .acpi_match_table = ACPI_PTR(qca_bluetooth_acpi_match), +- .shutdown = qca_serdev_shutdown, + .pm = &qca_pm_ops, + #ifdef CONFIG_DEV_COREDUMP + .coredump = hciqca_coredump, diff --git a/queue-6.18/cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch b/queue-6.18/cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch new file mode 100644 index 0000000000..0cee21c180 --- /dev/null +++ b/queue-6.18/cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch @@ -0,0 +1,60 @@ +From stable+bounces-259525-greg=kroah.com@vger.kernel.org Mon Jun 1 13:13:43 2026 +From: Sasha Levin +Date: Mon, 1 Jun 2026 07:02:03 -0400 +Subject: cpufreq: intel_pstate: Add and use hybrid_get_cpu_type() +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Sasha Levin +Message-ID: <20260601110204.439565-1-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit 528dde6619677ac6dc26d9dda1e3c9014b4a08c8 ] + +Introduce a function for identifying the type of a given CPU in a +hybrid system, called hybrid_get_cpu_type(), and use if for hybrid +scaling factor determination in hwp_get_cpu_scaling(). + +Signed-off-by: Rafael J. Wysocki +Link: https://patch.msgid.link/1954386.tdWV9SEqCh@rafael.j.wysocki +Stable-dep-of: 0e7c710478b3 ("cpufreq: intel_pstate: Use correct scaling factor on Raptor Lake-E") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -909,6 +909,11 @@ static struct freq_attr *hwp_cpufreq_att + [HWP_CPUFREQ_ATTR_COUNT] = NULL, + }; + ++static u8 hybrid_get_cpu_type(unsigned int cpu) ++{ ++ return cpu_data(cpu).topo.intel_type; ++} ++ + static bool no_cas __ro_after_init; + + static struct cpudata *hybrid_max_perf_cpu __read_mostly; +@@ -2299,18 +2304,14 @@ static int knl_get_turbo_pstate(int cpu) + static int hwp_get_cpu_scaling(int cpu) + { + if (hybrid_scaling_factor) { +- struct cpuinfo_x86 *c = &cpu_data(cpu); +- u8 cpu_type = c->topo.intel_type; +- + /* + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ +- if (cpu_type == INTEL_CPU_TYPE_CORE) ++ if (hybrid_get_cpu_type(cpu) == INTEL_CPU_TYPE_CORE) + return hybrid_scaling_factor; + +- if (cpu_type == INTEL_CPU_TYPE_ATOM) +- return core_get_scaling(); ++ return core_get_scaling(); + } + + /* Use core scaling on non-hybrid systems. */ diff --git a/queue-6.18/cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch b/queue-6.18/cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch new file mode 100644 index 0000000000..ec1b69d7f9 --- /dev/null +++ b/queue-6.18/cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch @@ -0,0 +1,49 @@ +From stable+bounces-259526-greg=kroah.com@vger.kernel.org Mon Jun 1 13:02:12 2026 +From: Sasha Levin +Date: Mon, 1 Jun 2026 07:02:04 -0400 +Subject: cpufreq: intel_pstate: Use correct scaling factor on Raptor Lake-E +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Henry Tseng , Sasha Levin +Message-ID: <20260601110204.439565-2-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit 0e7c710478b3089cdfe8669347f77b163e836c4f ] + +Raptor Lake-E has the same processor ID as Raptor Lake-S, so there is +an entry in intel_hybrid_scaling_factor[] for it. It does not contain +E-cores though and hybrid_get_cpu_type() returns 0 for its P-cores, so +they get the default "core" scaling factor. However, the original +Raptor Lake scaling factor for P-cores still needs to be used for +mapping the HWP performance levels of the P-cores in Raptor Lake-E to +frequency, as though they were part of a real hybrid system. + +To address this, update hwp_get_cpu_scaling() to return +hybrid_scaling_factor, which is the P-core scaling factor +retrieved from intel_hybrid_scaling_factor[], for all CPUs +that are not enumerated as E-cores. + +Fixes: 9b18d536b124 ("cpufreq: intel_pstate: Use CPPC to get scaling factors") +Link: https://lore.kernel.org/all/20260511235328.2018458-1-srinivas.pandruvada@linux.intel.com/ +Reported-by: Henry Tseng +Closes: https://lore.kernel.org/linux-pm/20260508063032.3248602-1-henrytseng@qnap.com/ +Signed-off-by: Rafael J. Wysocki +Cc: All applicable +Link: https://patch.msgid.link/4523296.ejJDZkT8p0@rafael.j.wysocki +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2308,7 +2308,7 @@ static int hwp_get_cpu_scaling(int cpu) + * Return the hybrid scaling factor for P-cores and use the + * default core scaling for E-cores. + */ +- if (hybrid_get_cpu_type(cpu) == INTEL_CPU_TYPE_CORE) ++ if (hybrid_get_cpu_type(cpu) != INTEL_CPU_TYPE_ATOM) + return hybrid_scaling_factor; + + return core_get_scaling(); diff --git a/queue-6.18/mptcp-borrow-forward-memory-from-subflow.patch b/queue-6.18/mptcp-borrow-forward-memory-from-subflow.patch new file mode 100644 index 0000000000..cac098b6de --- /dev/null +++ b/queue-6.18/mptcp-borrow-forward-memory-from-subflow.patch @@ -0,0 +1,200 @@ +From stable+bounces-256881-greg=kroah.com@vger.kernel.org Sat May 30 13:49:30 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:48:09 -0400 +Subject: mptcp: borrow forward memory from subflow +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20260530114810.1965750-2-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 9db5b3cec4ec1c0cd3239689f5c8653d691a1754 ] + +In the MPTCP receive path, we release the subflow allocated fwd +memory just to allocate it again shortly after for the msk. + +That could increases the failures chances, especially when we will +add backlog processing, with other actions could consume the just +released memory before the msk socket has a chance to do the +rcv allocation. + +Replace the skb_orphan() call with an open-coded variant that +explicitly borrows, the fwd memory from the subflow socket instead +of releasing it. + +The borrowed memory does not have PAGE_SIZE granularity; rounding to +the page size will make the fwd allocated memory higher than what is +strictly required and could make the incoming subflow fwd mem +consistently negative. Instead, keep track of the accumulated frag and +borrow the full page at subflow close time. + +This allow removing the last drop in the TCP to MPTCP transition and +the associated, now unused, MIB. + +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-12-1f34b6c1e0b1@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 50c2d91c5dfa ("mptcp: do not drop partial packets") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/fastopen.c | 4 +++- + net/mptcp/mib.c | 1 - + net/mptcp/mib.h | 1 - + net/mptcp/protocol.c | 23 +++++++++++++++-------- + net/mptcp/protocol.h | 28 ++++++++++++++++++++++++++++ + 5 files changed, 46 insertions(+), 11 deletions(-) + +--- a/net/mptcp/fastopen.c ++++ b/net/mptcp/fastopen.c +@@ -33,7 +33,8 @@ void mptcp_fastopen_subflow_synack_set_p + /* dequeue the skb from sk receive queue */ + __skb_unlink(skb, &ssk->sk_receive_queue); + skb_ext_reset(skb); +- skb_orphan(skb); ++ ++ mptcp_subflow_lend_fwdmem(subflow, skb); + + /* We copy the fastopen data, but that don't belong to the mptcp sequence + * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset() +@@ -52,6 +53,7 @@ void mptcp_fastopen_subflow_synack_set_p + mptcp_data_lock(sk); + DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); + ++ mptcp_borrow_fwdmem(sk, skb); + skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + mptcp_sk(sk)->bytes_received += skb->len; +--- a/net/mptcp/mib.c ++++ b/net/mptcp/mib.c +@@ -71,7 +71,6 @@ static const struct snmp_mib mptcp_snmp_ + SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), + SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), + SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), +- SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), + SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), + SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), + SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), +--- a/net/mptcp/mib.h ++++ b/net/mptcp/mib.h +@@ -70,7 +70,6 @@ enum linux_mptcp_mib_field { + MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */ + MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */ + MPTCP_MIB_MPRSTRX, /* Received a MP_RST */ +- MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ + MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ + MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ + MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */ +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -352,7 +352,7 @@ end: + static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, + int copy_len) + { +- const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); ++ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + /* the skb map_seq accounts for the skb offset: +@@ -377,11 +377,7 @@ static bool __mptcp_move_skb(struct sock + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *tail; + +- /* try to fetch required memory from subflow */ +- if (!sk_rmem_schedule(sk, skb, skb->truesize)) { +- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); +- goto drop; +- } ++ mptcp_borrow_fwdmem(sk, skb); + + if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { + /* in sequence */ +@@ -403,7 +399,6 @@ static bool __mptcp_move_skb(struct sock + * will retransmit as needed, if needed. + */ + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); +-drop: + mptcp_drop(sk, skb); + return false; + } +@@ -704,7 +699,7 @@ static bool __mptcp_move_skbs_from_subfl + size_t len = skb->len - offset; + + mptcp_init_skb(ssk, skb, offset, len); +- skb_orphan(skb); ++ mptcp_subflow_lend_fwdmem(subflow, skb); + ret = __mptcp_move_skb(sk, skb) || ret; + seq += len; + +@@ -2454,6 +2449,7 @@ static void __mptcp_close_ssk(struct soc + { + struct mptcp_sock *msk = mptcp_sk(sk); + bool dispose_it, need_push = false; ++ int fwd_remaining; + + /* Do not pass RX data to the msk, even if the subflow socket is not + * going to be freed (i.e. even for the first subflow on graceful +@@ -2462,6 +2458,17 @@ static void __mptcp_close_ssk(struct soc + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + subflow->closing = 1; + ++ /* Borrow the fwd allocated page left-over; fwd memory for the subflow ++ * could be negative at this point, but will be reach zero soon - when ++ * the data allocated using such fragment will be freed. ++ */ ++ if (subflow->lent_mem_frag) { ++ fwd_remaining = PAGE_SIZE - subflow->lent_mem_frag; ++ sk_forward_alloc_add(sk, fwd_remaining); ++ sk_forward_alloc_add(ssk, -fwd_remaining); ++ subflow->lent_mem_frag = 0; ++ } ++ + /* If the first subflow moved to a close state before accept, e.g. due + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -548,6 +548,7 @@ struct mptcp_subflow_context { + bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ + bool fully_established; /* path validated */ ++ u32 lent_mem_frag; + u32 remote_nonce; + u64 thmac; + u32 local_nonce; +@@ -647,6 +648,33 @@ mptcp_send_active_reset_reason(struct so + tcp_send_active_reset(sk, GFP_ATOMIC, reason); + } + ++/* Made the fwd mem carried by the given skb available to the msk, ++ * To be paired with a previous mptcp_subflow_lend_fwdmem() before freeing ++ * the skb or setting the skb ownership. ++ */ ++static inline void mptcp_borrow_fwdmem(struct sock *sk, struct sk_buff *skb) ++{ ++ struct sock *ssk = skb->sk; ++ ++ /* The subflow just lend the skb fwd memory, and we know that the skb ++ * is only accounted on the incoming subflow rcvbuf. ++ */ ++ DEBUG_NET_WARN_ON_ONCE(skb->destructor); ++ skb->sk = NULL; ++ sk_forward_alloc_add(sk, skb->truesize); ++ atomic_sub(skb->truesize, &ssk->sk_rmem_alloc); ++} ++ ++static inline void ++mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow, ++ struct sk_buff *skb) ++{ ++ int frag = (subflow->lent_mem_frag + skb->truesize) & (PAGE_SIZE - 1); ++ ++ skb->destructor = NULL; ++ subflow->lent_mem_frag = frag; ++} ++ + static inline u64 + mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) + { diff --git a/queue-6.18/mptcp-cleanup-fallback-dummy-mapping-generation.patch b/queue-6.18/mptcp-cleanup-fallback-dummy-mapping-generation.patch new file mode 100644 index 0000000000..ec90d8c211 --- /dev/null +++ b/queue-6.18/mptcp-cleanup-fallback-dummy-mapping-generation.patch @@ -0,0 +1,74 @@ +From stable+bounces-256884-greg=kroah.com@vger.kernel.org Sat May 30 13:49:58 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:48:23 -0400 +Subject: mptcp: cleanup fallback dummy mapping generation +To: stable@vger.kernel.org +Cc: Paolo Abeni , Geliang Tang , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20260530114824.1966673-1-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 2834f8edd74d5dda368087a654c0e52b141e9893 ] + +MPTCP currently access ack_seq outside the msk socket log scope to +generate the dummy mapping for fallback socket. Soon we are going +to introduce backlog usage and even for fallback socket the ack_seq +value will be significantly off outside of the msk socket lock scope. + +Avoid relying on ack_seq for dummy mapping generation, using instead +the subflow sequence number. Note that in case of disconnect() and +(re)connect() we must ensure that any previous state is re-set. + +Signed-off-by: Paolo Abeni +Reviewed-by: Geliang Tang +Tested-by: Geliang Tang +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-6-1f34b6c1e0b1@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 0981f90e1a05 ("mptcp: reset rcv wnd on disconnect") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 3 +++ + net/mptcp/subflow.c | 8 +++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3333,6 +3333,9 @@ static int mptcp_disconnect(struct sock + msk->rcvspace_init = 0; + msk->fastclosing = 0; + ++ /* for fallback's sake */ ++ WRITE_ONCE(msk->ack_seq, 0); ++ + WRITE_ONCE(sk->sk_shutdown, 0); + sk_error_report(sk); + return 0; +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -491,6 +491,9 @@ static void subflow_set_remote_key(struc + mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn); + subflow->iasn++; + ++ /* for fallback's sake */ ++ subflow->map_seq = subflow->iasn; ++ + WRITE_ONCE(msk->remote_key, subflow->remote_key); + WRITE_ONCE(msk->ack_seq, subflow->iasn); + WRITE_ONCE(msk->can_ack, true); +@@ -1435,9 +1438,12 @@ reset: + + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; +- subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; ++ subflow->map_seq = __mptcp_expand_seq(subflow->map_seq, ++ subflow->iasn + ++ TCP_SKB_CB(skb)->seq - ++ subflow->ssn_offset - 1); + WRITE_ONCE(subflow->data_avail, true); + return true; + } diff --git a/queue-6.18/mptcp-do-not-drop-partial-packets.patch b/queue-6.18/mptcp-do-not-drop-partial-packets.patch new file mode 100644 index 0000000000..63e85f88ec --- /dev/null +++ b/queue-6.18/mptcp-do-not-drop-partial-packets.patch @@ -0,0 +1,78 @@ +From stable+bounces-256882-greg=kroah.com@vger.kernel.org Sat May 30 13:49:37 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:48:10 -0400 +Subject: mptcp: do not drop partial packets +To: stable@vger.kernel.org +Cc: Shardul Bankar , Paolo Abeni , "Matthieu Baerts (NGI0)" , Sasha Levin +Message-ID: <20260530114810.1965750-3-sashal@kernel.org> + +From: Shardul Bankar + +[ Upstream commit 50c2d91c5dfa0e465826ec1f8dbad9cdc254bd85 ] + +When a packet arrives with map_seq < ack_seq < end_seq, the beginning +of the packet has already been acknowledged but the end contains new +data. Currently the entire packet is dropped as "old data," forcing +the sender to retransmit. + +Instead, skip the already-acked bytes by adjusting the skb offset and +enqueue only the new portion. Update bytes_received and ack_seq to +reflect the new data consumed. + +A previous attempt at this fix has been sent by Paolo Abeni [1], but had +issues [2]: it also added a zero-window check and changed rcv_wnd_sent +initialization, which caused test regressions. This version addresses +only the partial packet handling without modifying receive window +accounting. + +Fixes: ab174ad8ef76 ("mptcp: move ooo skbs into msk out of order queue.") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/c9b426a4e163aa3c4fe8b80c79f1a610f47ae7d8.1763075056.git.pabeni@redhat.com [1] +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/600 [2] +Signed-off-by: Shardul Bankar +[pabeni@redhat.com: update map] +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-1-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -395,12 +395,26 @@ static bool __mptcp_move_skb(struct sock + return false; + } + +- /* old data, keep it simple and drop the whole pkt, sender +- * will retransmit as needed, if needed. ++ /* Completely old data? */ ++ if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) { ++ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); ++ mptcp_drop(sk, skb); ++ return false; ++ } ++ ++ /* Partial packet: map_seq < ack_seq < end_seq. ++ * Skip the already-acked bytes and enqueue the new data. + */ +- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); +- mptcp_drop(sk, skb); +- return false; ++ copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq; ++ MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; ++ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq - ++ MPTCP_SKB_CB(skb)->map_seq; ++ msk->bytes_received += copy_len; ++ WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); ++ ++ skb_set_owner_r(skb, sk); ++ __skb_queue_tail(&sk->sk_receive_queue, skb); ++ return true; + } + + static void mptcp_stop_rtx_timer(struct sock *sk) diff --git a/queue-6.18/mptcp-handle-first-subflow-closing-consistently.patch b/queue-6.18/mptcp-handle-first-subflow-closing-consistently.patch new file mode 100644 index 0000000000..645d5424a2 --- /dev/null +++ b/queue-6.18/mptcp-handle-first-subflow-closing-consistently.patch @@ -0,0 +1,104 @@ +From stable+bounces-256880-greg=kroah.com@vger.kernel.org Sat May 30 13:49:16 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:48:08 -0400 +Subject: mptcp: handle first subflow closing consistently +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20260530114810.1965750-1-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 0eeb372deebce6c25b9afc09e35d6c75a744299a ] + +Currently, as soon as the PM closes a subflow, the msk stops accepting +data from it, even if the TCP socket could be still formally open in the +incoming direction, with the notable exception of the first subflow. + +The root cause of such behavior is that code currently piggy back two +separate semantic on the subflow->disposable bit: the subflow context +must be released and that the subflow must stop accepting incoming +data. + +The first subflow is never disposed, so it also never stop accepting +incoming data. Use a separate bit to mark the latter status and set such +bit in __mptcp_close_ssk() for all subflows. + +Beyond making per subflow behaviour more consistent this will also +simplify the next patch. + +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-11-1f34b6c1e0b1@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 50c2d91c5dfa ("mptcp: do not drop partial packets") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 14 +++++++++----- + net/mptcp/protocol.h | 3 ++- + 2 files changed, 11 insertions(+), 6 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -854,10 +854,10 @@ void mptcp_data_ready(struct sock *sk, s + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + /* The peer can send data while we are shutting down this +- * subflow at msk destruction time, but we must avoid enqueuing ++ * subflow at subflow destruction time, but we must avoid enqueuing + * more data to the msk receive queue + */ +- if (unlikely(subflow->disposable)) ++ if (unlikely(subflow->closing)) + return; + + mptcp_data_lock(sk); +@@ -2455,6 +2455,13 @@ static void __mptcp_close_ssk(struct soc + struct mptcp_sock *msk = mptcp_sk(sk); + bool dispose_it, need_push = false; + ++ /* Do not pass RX data to the msk, even if the subflow socket is not ++ * going to be freed (i.e. even for the first subflow on graceful ++ * subflow close. ++ */ ++ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); ++ subflow->closing = 1; ++ + /* If the first subflow moved to a close state before accept, e.g. due + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't +@@ -2465,7 +2472,6 @@ static void __mptcp_close_ssk(struct soc + /* ensure later check in mptcp_worker() will dispose the msk */ + sock_set_flag(sk, SOCK_DEAD); + mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1)); +- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_drop_ctx(ssk); + goto out_release; + } +@@ -2474,8 +2480,6 @@ static void __mptcp_close_ssk(struct soc + if (dispose_it) + list_del(&subflow->node); + +- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); +- + if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE) + tcp_set_state(ssk, TCP_CLOSE); + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -537,12 +537,13 @@ struct mptcp_subflow_context { + send_infinite_map : 1, + remote_key_valid : 1, /* received the peer key from */ + disposable : 1, /* ctx can be free at ulp release time */ ++ closing : 1, /* must not pass rx data to msk anymore */ + stale : 1, /* unable to snd/rcv data, do not use for xmit */ + valid_csum_seen : 1, /* at least one csum validated */ + is_mptfo : 1, /* subflow is doing TFO */ + close_event_done : 1, /* has done the post-closed part */ + mpc_drop : 1, /* the MPC option has been dropped in a rtx */ +- __unused : 9; ++ __unused : 8; + bool data_avail; + bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ diff --git a/queue-6.18/mptcp-reset-rcv-wnd-on-disconnect.patch b/queue-6.18/mptcp-reset-rcv-wnd-on-disconnect.patch new file mode 100644 index 0000000000..5c55a44ff8 --- /dev/null +++ b/queue-6.18/mptcp-reset-rcv-wnd-on-disconnect.patch @@ -0,0 +1,47 @@ +From stable+bounces-256885-greg=kroah.com@vger.kernel.org Sat May 30 13:50:05 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:48:24 -0400 +Subject: mptcp: reset rcv wnd on disconnect +To: stable@vger.kernel.org +Cc: Paolo Abeni , "Matthieu Baerts (NGI0)" , Sasha Levin +Message-ID: <20260530114824.1966673-2-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 0981f90e1a05773a4c29c6e720f5ea1e3c8f1876 ] + +If the MPTCP socket fallback to TCP before the MP handshake completion, +the IASN remain 0, and the rcv_wnd_sent field is not explicitly +initialized, just incremented over time with the data transfer. + +At disconnect time such value is not cleared. If the next connection falls +back to TCP before the MP handshake completion, the data transfer will +keep incrementing the receive window end sequence starting from the last +value used in the previous connection: the announced window will be +unrelated from the actual receiver buffer size and likely too big. + +Address the issue zeroing the field at disconnect time. + +Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-4-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3335,6 +3335,7 @@ static int mptcp_disconnect(struct sock + + /* for fallback's sake */ + WRITE_ONCE(msk->ack_seq, 0); ++ atomic64_set(&msk->rcv_wnd_sent, 0); + + WRITE_ONCE(sk->sk_shutdown, 0); + sk_error_report(sk); diff --git a/queue-6.18/net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch b/queue-6.18/net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch new file mode 100644 index 0000000000..f7a2e87aeb --- /dev/null +++ b/queue-6.18/net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch @@ -0,0 +1,73 @@ +From stable+bounces-256807-greg=kroah.com@vger.kernel.org Sat May 30 01:24:58 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:22:54 -0400 +Subject: net: devmem: reject dma-buf bind with non-page-aligned size or SG length +To: stable@vger.kernel.org +Cc: David Carlier , Bobby Eshleman , Stanislav Fomichev , Mina Almasry , Jakub Kicinski , Sasha Levin +Message-ID: <20260529232254.1877494-1-sashal@kernel.org> + +From: David Carlier + +[ Upstream commit 4eb82ba543421e9e38cc14e4e82058b78850df50 ] + +net_devmem_bind_dmabuf() trusts dmabuf->size and sg_dma_len() to be +PAGE_SIZE multiples without checking: + + - tx_vec is sized dmabuf->size / PAGE_SIZE, and + net_devmem_get_niov_at() only bounds-checks virt_addr < dmabuf->size + before indexing tx_vec[virt_addr / PAGE_SIZE]. With size = + N*PAGE_SIZE + r (1 <= r < PAGE_SIZE), sendmsg() at iov_base = + N*PAGE_SIZE passes the bound check and reads tx_vec[N] -- one past. + + - owner->area.num_niovs = len / PAGE_SIZE while gen_pool_add_owner() + covers the full byte len, so a non-page-multiple non-final sg + desyncs num_niovs from the gen_pool region for every later sg, on + both RX and TX. + +dma-buf does not require page-aligned sizes, so the bind path has to +enforce what its own indexing assumes. Reject both with -EINVAL. + +The size check is TX-only (only tx_vec is sized off dmabuf->size); the +SG-length check covers both directions. + +Fixes: bd61848900bf ("net: devmem: Implement TX path") +Cc: stable@vger.kernel.org +Signed-off-by: David Carlier +Reviewed-by: Bobby Eshleman +Acked-by: Stanislav Fomichev +Reviewed-by: Mina Almasry +Link: https://patch.msgid.link/20260519203530.66310-1-devnexen@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/core/devmem.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/net/core/devmem.c ++++ b/net/core/devmem.c +@@ -232,6 +232,11 @@ net_devmem_bind_dmabuf(struct net_device + } + + if (direction == DMA_TO_DEVICE) { ++ if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) { ++ err = -EINVAL; ++ NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE"); ++ goto err_unmap; ++ } + binding->tx_vec = kvmalloc_array(dmabuf->size / PAGE_SIZE, + sizeof(struct net_iov *), + GFP_KERNEL); +@@ -259,6 +264,12 @@ net_devmem_bind_dmabuf(struct net_device + size_t len = sg_dma_len(sg); + struct net_iov *niov; + ++ if (!IS_ALIGNED(len, PAGE_SIZE)) { ++ err = -EINVAL; ++ NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned"); ++ goto err_free_chunks; ++ } ++ + owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, + dev_to_node(&dev->dev)); + if (!owner) { diff --git a/queue-6.18/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch b/queue-6.18/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch new file mode 100644 index 0000000000..0c0397e43a --- /dev/null +++ b/queue-6.18/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch @@ -0,0 +1,62 @@ +From stable+bounces-257134-greg=kroah.com@vger.kernel.org Sat May 30 18:59:50 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 12:52:02 -0400 +Subject: octeontx2-pf: avoid double free of pool->stack on AQ init failure +To: stable@vger.kernel.org +Cc: Dawei Feng , Zilin Guan , Simon Horman , Jakub Kicinski , Sasha Levin +Message-ID: <20260530165202.3003654-1-sashal@kernel.org> + +From: Dawei Feng + +[ Upstream commit 9b244c242bec48b37e82b89787afd6a4c43457e1 ] + +otx2_pool_aq_init() frees pool->stack when mailbox sync or retry +allocation fails, but leaves the pointer unchanged. Later, +otx2_sq_aura_pool_init() unwinds the partial setup through +otx2_aura_pool_free(), which frees pool->stack again. The CN20K-specific +cn20k_pool_aq_init() implementation has the same bug in +its corresponding error path. + +Set pool->stack to NULL immediately after the local free so the shared +cleanup path does not free the same stack again while cleaning up +partially initialized pool state. + +The bug was first flagged by an experimental analysis tool we are +developing for kernel memory-management bugs while analyzing +v6.13-rc1. The tool is still under development and is not yet publicly +available. Manual inspection confirms that the bug is still present in +v7.1-rc3. + +Runtime validation was not performed because reproducing this path +requires OcteonTX2/CN20K hardware. + +Fixes: caa2da34fd25 ("octeontx2-pf: Initialize and config queues") +Fixes: d322fbd17203 ("octeontx2-pf: Initialize cn20k specific aura and pool contexts") +Cc: stable@vger.kernel.org +Signed-off-by: Zilin Guan +Signed-off-by: Dawei Feng +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20260515151826.1005397-1-dawei.feng@seu.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -1468,11 +1468,13 @@ int otx2_pool_init(struct otx2_nic *pfvf + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + qmem_free(pfvf->dev, pool->stack); ++ pool->stack = NULL; + return err; + } + aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!aq) { + qmem_free(pfvf->dev, pool->stack); ++ pool->stack = NULL; + return -ENOMEM; + } + } diff --git a/queue-6.18/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch b/queue-6.18/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch new file mode 100644 index 0000000000..658c1d255d --- /dev/null +++ b/queue-6.18/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch @@ -0,0 +1,131 @@ +From stable+bounces-256665-greg=kroah.com@vger.kernel.org Fri May 29 19:08:26 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 12:59:26 -0400 +Subject: platform/x86/intel/vsec: Fix enable_cnt imbalance on PCIe error recovery +To: stable@vger.kernel.org +Cc: "Lukas Wunner" , "Ilpo Järvinen" , "Sasha Levin" +Message-ID: <20260529165926.1255525-3-sashal@kernel.org> + +From: Lukas Wunner + +[ Upstream commit 348ccc754d8939e21ca5956ff45720b81d6e407f ] + +After a PCIe Uncorrectable Error has been reported by a device with +Intel Vendor Specific Extended Capabilities and has been recovered +through a Secondary Bus Reset, its driver calls intel_vsec_pci_probe() +to rescan and reinitialize VSECs. + +intel_vsec_pci_probe() invokes pcim_enable_device() and thereby adds +another devm action which calls pcim_disable_device() on driver unbind. + +So once the driver unbinds, pcim_disable_device() will be called as many +times as an Uncorrectable Error occurred, plus one. This will lead to +an enable_cnt imbalance on driver unbind. + +Additionally, since commit dc957ab6aa05 ("platform/x86/intel/vsec: Add +private data for per-device data"), a devm_kzalloc() allocation is +leaked on every Uncorrectable Error. + +Avoid by splitting the VSEC rescan out of intel_vsec_pci_probe() into a +separate helper and calling that on PCIe error recovery. + +Fixes: 936874b77dd0 ("platform/x86/intel/vsec: Add PCI error recovery support to Intel PMT") +Signed-off-by: Lukas Wunner +Cc: stable@vger.kernel.org # v6.0+ +Link: https://patch.msgid.link/bd594d09fa866dc51dddc9a447c3b23f9b1402cc.1778736835.git.lukas@wunner.de +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/vsec.c | 54 +++++++++++++++++++++----------------- + 1 file changed, 30 insertions(+), 24 deletions(-) + +--- a/drivers/platform/x86/intel/vsec.c ++++ b/drivers/platform/x86/intel/vsec.c +@@ -620,29 +620,13 @@ static void intel_vsec_skip_missing_depe + } + } + +-static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ++static int intel_vsec_pci_init(struct pci_dev *pdev) + { +- const struct intel_vsec_platform_info *info; +- struct vsec_priv *priv; +- int num_caps, ret; ++ struct vsec_priv *priv = pci_get_drvdata(pdev); ++ const struct intel_vsec_platform_info *info = priv->info; + int run_once = 0; + bool found_any = false; +- +- ret = pcim_enable_device(pdev); +- if (ret) +- return ret; +- +- pci_save_state(pdev); +- info = (const struct intel_vsec_platform_info *)id->driver_data; +- if (!info) +- return -EINVAL; +- +- priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); +- if (!priv) +- return -ENOMEM; +- +- priv->info = info; +- pci_set_drvdata(pdev, priv); ++ int num_caps; + + num_caps = hweight_long(info->caps); + while (num_caps--) { +@@ -663,6 +647,31 @@ static int intel_vsec_pci_probe(struct p + return 0; + } + ++static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ++{ ++ const struct intel_vsec_platform_info *info; ++ struct vsec_priv *priv; ++ int ret; ++ ++ ret = pcim_enable_device(pdev); ++ if (ret) ++ return ret; ++ ++ pci_save_state(pdev); ++ info = (const struct intel_vsec_platform_info *)id->driver_data; ++ if (!info) ++ return -EINVAL; ++ ++ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); ++ if (!priv) ++ return -ENOMEM; ++ ++ priv->info = info; ++ pci_set_drvdata(pdev, priv); ++ ++ return intel_vsec_pci_init(pdev); ++} ++ + int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info, + struct intel_vsec_device *vsec_dev) + { +@@ -796,7 +805,6 @@ static pci_ers_result_t intel_vsec_pci_s + { + struct intel_vsec_device *intel_vsec_dev; + pci_ers_result_t status = PCI_ERS_RESULT_DISCONNECT; +- const struct pci_device_id *pci_dev_id; + unsigned long index; + + dev_info(&pdev->dev, "Resetting PCI slot\n"); +@@ -817,10 +825,8 @@ static pci_ers_result_t intel_vsec_pci_s + devm_release_action(&pdev->dev, intel_vsec_remove_aux, + &intel_vsec_dev->auxdev); + } +- pci_disable_device(pdev); + pci_restore_state(pdev); +- pci_dev_id = pci_match_id(intel_vsec_pci_ids, pdev); +- intel_vsec_pci_probe(pdev, pci_dev_id); ++ intel_vsec_pci_init(pdev); + + out: + return status; diff --git a/queue-6.18/platform-x86-intel-vsec-make-driver_data-info-const.patch b/queue-6.18/platform-x86-intel-vsec-make-driver_data-info-const.patch new file mode 100644 index 0000000000..0f79f890dd --- /dev/null +++ b/queue-6.18/platform-x86-intel-vsec-make-driver_data-info-const.patch @@ -0,0 +1,144 @@ +From stable+bounces-256664-greg=kroah.com@vger.kernel.org Fri May 29 19:08:24 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 12:59:25 -0400 +Subject: platform/x86/intel/vsec: Make driver_data info const +To: stable@vger.kernel.org +Cc: "David E. Box" , "Michael J. Ruhl" , "Ilpo Järvinen" , "Sasha Levin" +Message-ID: <20260529165926.1255525-2-sashal@kernel.org> + +From: "David E. Box" + +[ Upstream commit 9577c74c96f88d807d1ba005adbf5952e7127e55 ] + +Treat PCI id->driver_data (intel_vsec_platform_info) as read-only by making +vsec_priv->info a const pointer and updating all function signatures to +accept const intel_vsec_platform_info *. + +This improves const-correctness and clarifies that the platform info data +from the driver_data table is not meant to be modified at runtime. + +No functional changes intended. + +Signed-off-by: David E. Box +Reviewed-by: Michael J. Ruhl +Link: https://patch.msgid.link/20260313015202.3660072-3-david.e.box@linux.intel.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Stable-dep-of: 348ccc754d89 ("platform/x86/intel/vsec: Fix enable_cnt imbalance on PCIe error recovery") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/vsec.c | 20 ++++++++++---------- + include/linux/intel_vsec.h | 4 ++-- + 2 files changed, 12 insertions(+), 12 deletions(-) + +--- a/drivers/platform/x86/intel/vsec.c ++++ b/drivers/platform/x86/intel/vsec.c +@@ -42,7 +42,7 @@ enum vsec_device_state { + }; + + struct vsec_priv { +- struct intel_vsec_platform_info *info; ++ const struct intel_vsec_platform_info *info; + struct device *suppliers[VSEC_FEATURE_COUNT]; + struct oobmsm_plat_info plat_info; + enum vsec_device_state state[VSEC_FEATURE_COUNT]; +@@ -270,7 +270,7 @@ cleanup_aux: + EXPORT_SYMBOL_NS_GPL(intel_vsec_add_aux, "INTEL_VSEC"); + + static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *header, +- struct intel_vsec_platform_info *info, ++ const struct intel_vsec_platform_info *info, + unsigned long cap_id, u64 base_addr) + { + struct intel_vsec_device __free(kfree) *intel_vsec_dev = NULL; +@@ -406,7 +406,7 @@ static int get_cap_id(u32 header_id, uns + + static int intel_vsec_register_device(struct pci_dev *pdev, + struct intel_vsec_header *header, +- struct intel_vsec_platform_info *info, ++ const struct intel_vsec_platform_info *info, + u64 base_addr) + { + const struct vsec_feature_dependency *consumer_deps; +@@ -452,7 +452,7 @@ static int intel_vsec_register_device(st + } + + static bool intel_vsec_walk_header(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + struct intel_vsec_header **header = info->headers; + bool have_devices = false; +@@ -468,7 +468,7 @@ static bool intel_vsec_walk_header(struc + } + + static bool intel_vsec_walk_dvsec(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + bool have_devices = false; + int pos = 0; +@@ -519,7 +519,7 @@ static bool intel_vsec_walk_dvsec(struct + } + + static bool intel_vsec_walk_vsec(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + bool have_devices = false; + int pos = 0; +@@ -565,7 +565,7 @@ static bool intel_vsec_walk_vsec(struct + } + + int intel_vsec_register(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + if (!pdev || !info || !info->headers) + return -EINVAL; +@@ -578,7 +578,7 @@ int intel_vsec_register(struct pci_dev * + EXPORT_SYMBOL_NS_GPL(intel_vsec_register, "INTEL_VSEC"); + + static bool intel_vsec_get_features(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + bool found = false; + +@@ -622,7 +622,7 @@ static void intel_vsec_skip_missing_depe + + static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) + { +- struct intel_vsec_platform_info *info; ++ const struct intel_vsec_platform_info *info; + struct vsec_priv *priv; + int num_caps, ret; + int run_once = 0; +@@ -633,7 +633,7 @@ static int intel_vsec_pci_probe(struct p + return ret; + + pci_save_state(pdev); +- info = (struct intel_vsec_platform_info *)id->driver_data; ++ info = (const struct intel_vsec_platform_info *)id->driver_data; + if (!info) + return -EINVAL; + +--- a/include/linux/intel_vsec.h ++++ b/include/linux/intel_vsec.h +@@ -199,13 +199,13 @@ static inline struct intel_vsec_device * + + #if IS_ENABLED(CONFIG_INTEL_VSEC) + int intel_vsec_register(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info); ++ const struct intel_vsec_platform_info *info); + int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info, + struct intel_vsec_device *vsec_dev); + struct oobmsm_plat_info *intel_vsec_get_mapping(struct pci_dev *pdev); + #else + static inline int intel_vsec_register(struct pci_dev *pdev, +- struct intel_vsec_platform_info *info) ++ const struct intel_vsec_platform_info *info) + { + return -ENODEV; + } diff --git a/queue-6.18/platform-x86-intel-vsec-refactor-base_addr-handling.patch b/queue-6.18/platform-x86-intel-vsec-refactor-base_addr-handling.patch new file mode 100644 index 0000000000..ec1f20b85f --- /dev/null +++ b/queue-6.18/platform-x86-intel-vsec-refactor-base_addr-handling.patch @@ -0,0 +1,124 @@ +From stable+bounces-256663-greg=kroah.com@vger.kernel.org Fri May 29 19:34:52 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 12:59:24 -0400 +Subject: platform/x86/intel/vsec: Refactor base_addr handling +To: stable@vger.kernel.org +Cc: "David E. Box" , "Michael J. Ruhl" , "Ilpo Järvinen" , "Sasha Levin" +Message-ID: <20260529165926.1255525-1-sashal@kernel.org> + +From: "David E. Box" + +[ Upstream commit 904b333fc51cc045941df9656302449a0fc9978e ] + +The base_addr field in intel_vsec_platform_info was originally added to +support devices that emulate PCI VSEC capabilities in MMIO. Previously, +the code would check at registration time whether base_addr was set, +falling back to the PCI BAR if not. + +Refactor this by making base_addr an explicit function parameter. This +clarifies ownership of the value and removes conditional logic from +intel_vsec_add_dev(). It also enables making intel_vsec_platform_info +const in a later patch, since the function no longer needs to write to +info->base_addr. + +No functional change intended. + +Signed-off-by: David E. Box +Reviewed-by: Michael J. Ruhl +Link: https://patch.msgid.link/20260313015202.3660072-2-david.e.box@linux.intel.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Stable-dep-of: 348ccc754d89 ("platform/x86/intel/vsec: Fix enable_cnt imbalance on PCIe error recovery") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/vsec.c | 23 ++++++++++------------- + 1 file changed, 10 insertions(+), 13 deletions(-) + +--- a/drivers/platform/x86/intel/vsec.c ++++ b/drivers/platform/x86/intel/vsec.c +@@ -271,14 +271,13 @@ EXPORT_SYMBOL_NS_GPL(intel_vsec_add_aux, + + static int intel_vsec_add_dev(struct pci_dev *pdev, struct intel_vsec_header *header, + struct intel_vsec_platform_info *info, +- unsigned long cap_id) ++ unsigned long cap_id, u64 base_addr) + { + struct intel_vsec_device __free(kfree) *intel_vsec_dev = NULL; + struct resource __free(kfree) *res = NULL; + struct resource *tmp; + struct device *parent; + unsigned long quirks = info->quirks; +- u64 base_addr; + int i; + + if (info->parent) +@@ -310,11 +309,6 @@ static int intel_vsec_add_dev(struct pci + if (quirks & VSEC_QUIRK_TABLE_SHIFT) + header->offset >>= TABLE_OFFSET_SHIFT; + +- if (info->base_addr) +- base_addr = info->base_addr; +- else +- base_addr = pdev->resource[header->tbir].start; +- + /* + * The DVSEC/VSEC contains the starting offset and count for a block of + * discovery tables. Create a resource array of these tables to the +@@ -412,7 +406,8 @@ static int get_cap_id(u32 header_id, uns + + static int intel_vsec_register_device(struct pci_dev *pdev, + struct intel_vsec_header *header, +- struct intel_vsec_platform_info *info) ++ struct intel_vsec_platform_info *info, ++ u64 base_addr) + { + const struct vsec_feature_dependency *consumer_deps; + struct vsec_priv *priv; +@@ -428,7 +423,7 @@ static int intel_vsec_register_device(st + * For others using the exported APIs, add the device directly. + */ + if (!pci_match_id(intel_vsec_pci_ids, pdev)) +- return intel_vsec_add_dev(pdev, header, info, cap_id); ++ return intel_vsec_add_dev(pdev, header, info, cap_id, base_addr); + + priv = pci_get_drvdata(pdev); + if (priv->state[cap_id] == STATE_REGISTERED || +@@ -444,7 +439,7 @@ static int intel_vsec_register_device(st + + consumer_deps = get_consumer_dependencies(priv, cap_id); + if (!consumer_deps || suppliers_ready(priv, consumer_deps, cap_id)) { +- ret = intel_vsec_add_dev(pdev, header, info, cap_id); ++ ret = intel_vsec_add_dev(pdev, header, info, cap_id, base_addr); + if (ret) + priv->state[cap_id] = STATE_SKIP; + else +@@ -464,7 +459,7 @@ static bool intel_vsec_walk_header(struc + int ret; + + for ( ; *header; header++) { +- ret = intel_vsec_register_device(pdev, *header, info); ++ ret = intel_vsec_register_device(pdev, *header, info, info->base_addr); + if (!ret) + have_devices = true; + } +@@ -512,7 +507,8 @@ static bool intel_vsec_walk_dvsec(struct + pci_read_config_dword(pdev, pos + PCI_DVSEC_HEADER2, &hdr); + header.id = PCI_DVSEC_HEADER2_ID(hdr); + +- ret = intel_vsec_register_device(pdev, &header, info); ++ ret = intel_vsec_register_device(pdev, &header, info, ++ pci_resource_start(pdev, header.tbir)); + if (ret) + continue; + +@@ -557,7 +553,8 @@ static bool intel_vsec_walk_vsec(struct + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + +- ret = intel_vsec_register_device(pdev, &header, info); ++ ret = intel_vsec_register_device(pdev, &header, info, ++ pci_resource_start(pdev, header.tbir)); + if (ret) + continue; + diff --git a/queue-6.18/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch b/queue-6.18/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch new file mode 100644 index 0000000000..274e2ba30e --- /dev/null +++ b/queue-6.18/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch @@ -0,0 +1,646 @@ +From stable+bounces-256707-greg=kroah.com@vger.kernel.org Fri May 29 20:35:23 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 14:35:07 -0400 +Subject: rxrpc: Fix DATA decrypt vs splice() by copying data to buffer in recvmsg +To: stable@vger.kernel.org +Cc: David Howells , Hyunwoo Kim , Simon Horman , Jiayuan Chen , linux-afs@lists.infradead.org, Jeffrey Altman , Marc Dionne , Jakub Kicinski , Sasha Levin +Message-ID: <20260529183508.1594050-2-sashal@kernel.org> + +From: David Howells + +[ Upstream commit d2bc90cf6c75cb96d2ce549be6c35efa3099d25b ] + +This improves the fix for CVE-2026-43500. + +Fix the pagecache corruption from in-place decryption of a DATA packet +transmitted locally by splice() by getting rid of the packet sharing in the +I/O thread and unconditionally extracting the packet content into a bounce +buffer in which the buffer is decrypted. recvmsg() (or the kernel +equivalent) then copies the data from the bounce buffer to the destination +buffer. The sk_buff then remains unmodified. + +This has an additional advantage in that the packet is then arranged in the +buffer with the correct alignment required for the crypto algorithms to +process directly. The performance of the crypto does seem to be a little +faster and, surprisingly, the unencrypted performance doesn't seem to +change much - possibly due to removing complexity from the I/O thread. + +Yet another advantage is that the I/O thread doesn't have to copy packets +which would slow down packet distribution, ACK generation, etc.. + +The buffer belongs to the call and is allocated initially at 2K, +sufficiently large to hold a whole jumbo subpacket, but the buffer will be +increased in size if needed. However, to take this work, MSG_PEEK may +cause a later packet to be decrypted into the buffer, in which case the +earlier one will need re-decrypting for a subsequent recvmsg(). + +Note that rx_pkt_offset may legitimately see 0 as a valid offset now, so +switch to using USHRT_MAX to indicate an invalid offset. + +Note also that I would generally prefer to replace the buffers of the +current sk_buff with a new kmalloc'd buffer of the right size, ditching the +old data and frags as this makes the handling of MSG_PEEK easier and +removes the re-decryption issue, but this looks like quite a complicated +thing to achieve. skb_morph() looks half way to what I want, but I don't +want to have to allocate a new sk_buff. + +Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") +Reported-by: Hyunwoo Kim +Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ +Signed-off-by: David Howells +cc: Simon Horman +cc: Jiayuan Chen +cc: linux-afs@lists.infradead.org +Reviewed-by: Jeffrey Altman +Tested-by: Marc Dionne +Link: https://patch.msgid.link/20260515230516.2718212-3-dhowells@redhat.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8bfab4b6ffc2 ("rxrpc: Fix RESPONSE packet verification to extract skb to a linear buffer") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 7 ++- + net/rxrpc/call_event.c | 22 ------------ + net/rxrpc/call_object.c | 2 + + net/rxrpc/insecure.c | 3 - + net/rxrpc/recvmsg.c | 68 ++++++++++++++++++++++++++++++------- + net/rxrpc/rxgk.c | 51 +++++++++++++--------------- + net/rxrpc/rxgk_common.h | 82 +++++++++++++++++++++++++++++++++++++++++++++ + net/rxrpc/rxkad.c | 86 +++++++++++++++++------------------------------- + 8 files changed, 201 insertions(+), 120 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -213,8 +213,6 @@ struct rxrpc_skb_priv { + struct { + u16 offset; /* Offset of data */ + u16 len; /* Length of data */ +- u8 flags; +-#define RXRPC_RX_VERIFIED 0x01 + }; + struct { + rxrpc_seq_t first_ack; /* First packet in acks table */ +@@ -774,6 +772,11 @@ struct rxrpc_call { + struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ + struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ + struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ ++ void *rx_dec_buffer; /* Decryption buffer */ ++ unsigned short rx_dec_bsize; /* rx_dec_buffer size */ ++ unsigned short rx_dec_offset; /* Decrypted packet data offset */ ++ unsigned short rx_dec_len; /* Decrypted packet data len */ ++ rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */ + + rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ + rxrpc_seq_t rx_consumed; /* Highest packet consumed */ +--- a/net/rxrpc/call_event.c ++++ b/net/rxrpc/call_event.c +@@ -332,27 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc + + saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; + +- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && +- sp->hdr.securityIndex != 0 && +- (skb_cloned(skb) || +- skb_has_frag_list(skb) || +- skb_has_shared_frag(skb))) { +- /* Unshare the packet so that it can be +- * modified by in-place decryption. +- */ +- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); +- +- if (nskb) { +- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); +- rxrpc_input_call_packet(call, nskb); +- rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); +- } else { +- /* OOM - Drop the packet. */ +- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); +- } +- } else { +- rxrpc_input_call_packet(call, skb); +- } ++ rxrpc_input_call_packet(call, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + did_receive = true; + } +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(stru + spin_lock_init(&call->notify_lock); + refcount_set(&call->ref, 1); + call->debug_id = debug_id; ++ call->rx_pkt_offset = USHRT_MAX; + call->tx_total_len = -1; + call->tx_jumbo_max = 1; + call->next_rx_timo = 20 * HZ; +@@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(str + rxrpc_purge_queue(&call->recvmsg_queue); + rxrpc_purge_queue(&call->rx_queue); + rxrpc_purge_queue(&call->rx_oos_queue); ++ kfree(call->rx_dec_buffer); + } + + /* +--- a/net/rxrpc/insecure.c ++++ b/net/rxrpc/insecure.c +@@ -32,9 +32,6 @@ static int none_secure_packet(struct rxr + + static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) + { +- struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- +- sp->flags |= RXRPC_RX_VERIFIED; + return 0; + } + +--- a/net/rxrpc/recvmsg.c ++++ b/net/rxrpc/recvmsg.c +@@ -147,15 +147,52 @@ static void rxrpc_rotate_rx_window(struc + } + + /* +- * Decrypt and verify a DATA packet. ++ * Decrypt and verify a DATA packet. The content of the packet is pulled out ++ * into a flat buffer rather than decrypting in place in the skbuff. This also ++ * has the advantage of aligning the buffer correctly for the crypto routines. ++ * ++ * We keep track of the sequence number of the packet currently decrypted into ++ * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new ++ * packet, subsequent recvmsg() calls will have to go back and re-decrypt the ++ * current packet. + */ + static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) + { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ++ int ret; + +- if (sp->flags & RXRPC_RX_VERIFIED) +- return 0; +- return call->security->verify_packet(call, skb); ++ if (sp->len > call->rx_dec_bsize) { ++ /* Make sure we can hold a 1412-byte jumbo subpacket and make ++ * sure that the buffer size is aligned to a crypto blocksize. ++ */ ++ size_t size = clamp(round_up(sp->len, 32), 2048, 65535); ++ void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS); ++ ++ if (!buffer) ++ return -ENOMEM; ++ call->rx_dec_buffer = buffer; ++ call->rx_dec_bsize = size; ++ } ++ ++ ret = -EFAULT; ++ if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0) ++ goto err; ++ ++ call->rx_dec_offset = 0; ++ call->rx_dec_len = sp->len; ++ call->rx_dec_seq = sp->hdr.seq; ++ ret = call->security->verify_packet(call, skb); ++ if (ret < 0) ++ goto err; ++ return 0; ++ ++err: ++ kfree(call->rx_dec_buffer); ++ call->rx_dec_buffer = NULL; ++ call->rx_dec_bsize = 0; ++ call->rx_dec_offset = 0; ++ call->rx_dec_len = 0; ++ return ret; + } + + /* +@@ -283,16 +320,21 @@ static int rxrpc_recvmsg_data(struct soc + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + +- if (rx_pkt_offset == 0) { ++ if (call->rx_dec_seq != sp->hdr.seq || ++ !call->rx_dec_buffer) { + ret2 = rxrpc_verify_data(call, skb); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, +- sp->offset, sp->len, ret2); ++ call->rx_dec_offset, ++ call->rx_dec_len, ret2); + if (ret2 < 0) { + ret = ret2; + goto out; + } +- rx_pkt_offset = sp->offset; +- rx_pkt_len = sp->len; ++ } ++ ++ if (rx_pkt_offset == USHRT_MAX) { ++ rx_pkt_offset = call->rx_dec_offset; ++ rx_pkt_len = call->rx_dec_len; + } else { + trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); +@@ -304,10 +346,10 @@ static int rxrpc_recvmsg_data(struct soc + if (copy > remain) + copy = remain; + if (copy > 0) { +- ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, +- copy); +- if (ret2 < 0) { +- ret = ret2; ++ ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset, ++ copy, iter); ++ if (ret2 != copy) { ++ ret = -EFAULT; + goto out; + } + +@@ -328,7 +370,7 @@ static int rxrpc_recvmsg_data(struct soc + /* The whole packet has been transferred. */ + if (sp->hdr.flags & RXRPC_LAST_PACKET) + ret = 1; +- rx_pkt_offset = 0; ++ rx_pkt_offset = USHRT_MAX; + rx_pkt_len = 0; + + skb = skb_peek_next(skb, &call->recvmsg_queue); +--- a/net/rxrpc/rxgk.c ++++ b/net/rxrpc/rxgk.c +@@ -473,8 +473,9 @@ static int rxgk_verify_packet_integrity( + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxgk_header *hdr; + struct krb5_buffer metadata; +- unsigned int offset = sp->offset, len = sp->len; ++ unsigned int len = call->rx_dec_len; + size_t data_offset = 0, data_len = len; ++ void *data = call->rx_dec_buffer, *p = data; + u32 ac = 0; + int ret = -ENOMEM; + +@@ -500,16 +501,15 @@ static int rxgk_verify_packet_integrity( + + metadata.len = sizeof(*hdr); + metadata.data = hdr; +- ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata, +- skb, &offset, &len, &ac); ++ ret = rxgk_verify_mic(gk->krb5, gk->rx_Kc, &metadata, &p, &len, &ac); + kfree(hdr); + if (ret < 0) { + if (ret != -ENOMEM) + rxrpc_abort_eproto(call, skb, ac, + rxgk_abort_1_verify_mic_eproto); + } else { +- sp->offset = offset; +- sp->len = len; ++ call->rx_dec_offset = p - data; ++ call->rx_dec_len = len; + } + + put_gk: +@@ -526,56 +526,53 @@ static int rxgk_verify_packet_encrypted( + struct sk_buff *skb) + { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- struct rxgk_header hdr; +- unsigned int offset = sp->offset, len = sp->len; ++ struct rxgk_header *hdr; ++ unsigned int offset = 0, len = call->rx_dec_len; ++ void *data = call->rx_dec_buffer, *p = data; + int ret; + u32 ac = 0; + + _enter(""); + + if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE, +- len, sizeof(hdr)) < 0) { ++ len, sizeof(*hdr)) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + +- ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); ++ ret = rxgk_decrypt(gk->krb5, gk->rx_enc, &p, &len, &ac); + if (ret < 0) { + if (ret != -ENOMEM) + rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto); + goto error; + } ++ offset = p - data; + +- if (len < sizeof(hdr)) { ++ if (len < sizeof(*hdr)) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + + /* Extract the header from the skb */ +- ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr)); +- if (ret < 0) { +- ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, +- rxgk_abort_2_short_encdata); +- goto error; +- } +- offset += sizeof(hdr); +- len -= sizeof(hdr); +- +- if (ntohl(hdr.epoch) != call->conn->proto.epoch || +- ntohl(hdr.cid) != call->cid || +- ntohl(hdr.call_number) != call->call_id || +- ntohl(hdr.seq) != sp->hdr.seq || +- ntohl(hdr.sec_index) != call->security_ix || +- ntohl(hdr.data_len) > len) { ++ hdr = data + offset; ++ offset += sizeof(*hdr); ++ len -= sizeof(*hdr); ++ ++ if (ntohl(hdr->epoch) != call->conn->proto.epoch || ++ ntohl(hdr->cid) != call->cid || ++ ntohl(hdr->call_number) != call->call_id || ++ ntohl(hdr->seq) != sp->hdr.seq || ++ ntohl(hdr->sec_index) != call->security_ix || ++ ntohl(hdr->data_len) > len) { + ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON, + rxgk_abort_2_short_data); + goto error; + } + +- sp->offset = offset; +- sp->len = ntohl(hdr.data_len); ++ call->rx_dec_offset = offset; ++ call->rx_dec_len = ntohl(hdr->data_len); + ret = 0; + error: + rxgk_put(gk); +--- a/net/rxrpc/rxgk_common.h ++++ b/net/rxrpc/rxgk_common.h +@@ -106,6 +106,49 @@ int rxgk_decrypt_skb(const struct krb5_e + } + + /* ++ * Apply decryption and checksumming functions a flat data buffer. The data ++ * point and length are updated to reflect the actual content of the encrypted ++ * region. ++ */ ++static inline int rxgk_decrypt(const struct krb5_enctype *krb5, ++ struct crypto_aead *aead, ++ void **_data, unsigned int *_len, ++ int *_error_code) ++{ ++ struct scatterlist sg[1]; ++ size_t offset = 0, len = *_len; ++ int ret; ++ ++ sg_init_one(sg, *_data, len); ++ ++ ret = crypto_krb5_decrypt(krb5, aead, sg, 1, &offset, &len); ++ switch (ret) { ++ case 0: ++ if (offset & 3) { ++ *_error_code = RXGK_INCONSISTENCY; ++ ret = -EPROTO; ++ break; ++ } ++ *_data += offset; ++ *_len = len; ++ break; ++ case -EBADMSG: /* Checksum mismatch. */ ++ case -EPROTO: ++ *_error_code = RXGK_SEALEDINCON; ++ break; ++ case -EMSGSIZE: ++ *_error_code = RXGK_PACKETSHORT; ++ break; ++ case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ ++ default: ++ *_error_code = RXGK_INCONSISTENCY; ++ break; ++ } ++ ++ return ret; ++} ++ ++/* + * Check the MIC on a region of an skbuff. The offset and length are updated + * to reflect the actual content of the secure region. + */ +@@ -134,6 +177,45 @@ int rxgk_verify_mic_skb(const struct krb + *_len = len; + break; + case -EBADMSG: /* Checksum mismatch */ ++ case -EPROTO: ++ *_error_code = RXGK_SEALEDINCON; ++ break; ++ case -EMSGSIZE: ++ *_error_code = RXGK_PACKETSHORT; ++ break; ++ case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ ++ default: ++ *_error_code = RXGK_INCONSISTENCY; ++ break; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Check the MIC on a flat buffer. The data pointer and length are updated to ++ * reflect the actual content of the secure region. ++ */ ++static inline ++int rxgk_verify_mic(const struct krb5_enctype *krb5, ++ struct crypto_shash *shash, ++ const struct krb5_buffer *metadata, ++ void **_data, unsigned int *_len, ++ u32 *_error_code) ++{ ++ struct scatterlist sg[1]; ++ size_t offset = 0, len = *_len; ++ int ret; ++ ++ sg_init_one(sg, *_data, len); ++ ++ ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, 1, &offset, &len); ++ switch (ret) { ++ case 0: ++ *_data += offset; ++ *_len = len; ++ break; ++ case -EBADMSG: /* Checksum mismatch */ + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -430,27 +430,25 @@ static int rxkad_verify_packet_1(struct + rxrpc_seq_t seq, + struct skcipher_request *req) + { +- struct rxkad_level1_hdr sechdr; ++ struct rxkad_level1_hdr *sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt iv; +- struct scatterlist sg[16]; +- u32 data_size, buf; ++ struct scatterlist sg[1]; ++ void *data = call->rx_dec_buffer; ++ u32 len = sp->len, data_size, buf; + u16 check; + int ret; + + _enter(""); + +- if (sp->len < 8) ++ if (len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_header); + + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ +- sg_init_table(sg, ARRAY_SIZE(sg)); +- ret = skb_to_sgvec(skb, sg, sp->offset, 8); +- if (unlikely(ret < 0)) +- return ret; ++ sg_init_one(sg, data, len); + + /* start the decryption afresh */ + memset(&iv, 0, sizeof(iv)); +@@ -464,13 +462,11 @@ static int rxkad_verify_packet_1(struct + return ret; + + /* Extract the decrypted packet length */ +- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) +- return rxrpc_abort_eproto(call, skb, RXKADDATALEN, +- rxkad_abort_1_short_encdata); +- sp->offset += sizeof(sechdr); +- sp->len -= sizeof(sechdr); ++ sechdr = data; ++ call->rx_dec_offset = sizeof(*sechdr); ++ len -= sizeof(*sechdr); + +- buf = ntohl(sechdr.data_size); ++ buf = ntohl(sechdr->data_size); + data_size = buf & 0xffff; + + check = buf >> 16; +@@ -479,10 +475,10 @@ static int rxkad_verify_packet_1(struct + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_check); +- if (data_size > sp->len) ++ if (data_size > len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_data); +- sp->len = data_size; ++ call->rx_dec_len = data_size; + + _leave(" = 0 [dlen=%x]", data_size); + return 0; +@@ -496,43 +492,28 @@ static int rxkad_verify_packet_2(struct + struct skcipher_request *req) + { + const struct rxrpc_key_token *token; +- struct rxkad_level2_hdr sechdr; ++ struct rxkad_level2_hdr *sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt iv; +- struct scatterlist _sg[4], *sg; +- u32 data_size, buf; ++ struct scatterlist sg[1]; ++ void *data = call->rx_dec_buffer; ++ u32 len = sp->len, data_size, buf; + u16 check; +- int nsg, ret; ++ int ret; + +- _enter(",{%d}", sp->len); ++ _enter(",{%d}", len); + +- if (sp->len < 8) ++ if (len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_header); + + /* Don't let the crypto algo see a misaligned length. */ +- sp->len = round_down(sp->len, 8); ++ len = round_down(len, 8); + +- /* Decrypt the skbuff in-place. TODO: We really want to decrypt +- * directly into the target buffer. ++ /* Decrypt in place in the call's decryption buffer. TODO: We really ++ * want to decrypt directly into the target buffer. + */ +- sg = _sg; +- nsg = skb_shinfo(skb)->nr_frags + 1; +- if (nsg <= 4) { +- nsg = 4; +- } else { +- sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); +- if (!sg) +- return -ENOMEM; +- } +- +- sg_init_table(sg, nsg); +- ret = skb_to_sgvec(skb, sg, sp->offset, sp->len); +- if (unlikely(ret < 0)) { +- if (sg != _sg) +- kfree(sg); +- return ret; +- } ++ sg_init_one(sg, data, len); + + /* decrypt from the session key */ + token = call->conn->key->payload.data[0]; +@@ -540,11 +521,9 @@ static int rxkad_verify_packet_2(struct + + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); +- skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); ++ skcipher_request_set_crypt(req, sg, sg, len, iv.x); + ret = crypto_skcipher_decrypt(req); + skcipher_request_zero(req); +- if (sg != _sg) +- kfree(sg); + if (ret < 0) { + if (ret == -ENOMEM) + return ret; +@@ -553,13 +532,11 @@ static int rxkad_verify_packet_2(struct + } + + /* Extract the decrypted packet length */ +- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) +- return rxrpc_abort_eproto(call, skb, RXKADDATALEN, +- rxkad_abort_2_short_len); +- sp->offset += sizeof(sechdr); +- sp->len -= sizeof(sechdr); ++ sechdr = data; ++ call->rx_dec_offset = sizeof(*sechdr); ++ len -= sizeof(*sechdr); + +- buf = ntohl(sechdr.data_size); ++ buf = ntohl(sechdr->data_size); + data_size = buf & 0xffff; + + check = buf >> 16; +@@ -569,17 +546,18 @@ static int rxkad_verify_packet_2(struct + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_check); + +- if (data_size > sp->len) ++ if (data_size > len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_data); + +- sp->len = data_size; ++ call->rx_dec_len = data_size; + _leave(" = 0 [dlen=%x]", data_size); + return 0; + } + + /* +- * Verify the security on a received packet and the subpackets therein. ++ * Verify the security on a received (sub)packet. If the packet needs ++ * modifying (e.g. decrypting), it must be copied. + */ + static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) + { diff --git a/queue-6.18/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch b/queue-6.18/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch new file mode 100644 index 0000000000..cdc5c2c728 --- /dev/null +++ b/queue-6.18/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch @@ -0,0 +1,614 @@ +From stable+bounces-256708-greg=kroah.com@vger.kernel.org Fri May 29 20:35:24 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 14:35:08 -0400 +Subject: rxrpc: Fix RESPONSE packet verification to extract skb to a linear buffer +To: stable@vger.kernel.org +Cc: David Howells , Hyunwoo Kim , Simon Horman , Jiayuan Chen , linux-afs@lists.infradead.org, stable@kernel.org, Jeffrey Altman , Marc Dionne , Jakub Kicinski , Sasha Levin +Message-ID: <20260529183508.1594050-3-sashal@kernel.org> + +From: David Howells + +[ Upstream commit 8bfab4b6ffc2fe92da86300728fc8c3c7ebffb56 ] + +This improves the fix for CVE-2026-43500. + +Fix the verification of RESPONSE packets to avoid the problem of +overwriting a RESPONSE packet sent via splice to a local address by +extracting the contents of the UDP packet into a kmalloc'd linear buffer +rather than decrypting the data in place in the sk_buff (which may corrupt +the original buffer). + +Fixes: 24481a7f5733 ("rxrpc: Fix conn-level packet handling to unshare RESPONSE packets") +Reported-by: Hyunwoo Kim +Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ +Signed-off-by: David Howells +cc: Simon Horman +cc: Jiayuan Chen +cc: linux-afs@lists.infradead.org +cc: stable@kernel.org +Reviewed-by: Jeffrey Altman +Tested-by: Marc Dionne +Link: https://patch.msgid.link/20260515230516.2718212-4-dhowells@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 7 ++- + net/rxrpc/conn_event.c | 30 ++++++--------- + net/rxrpc/insecure.c | 5 +- + net/rxrpc/rxgk.c | 96 +++++++++++++++--------------------------------- + net/rxrpc/rxgk_app.c | 46 +++++++++-------------- + net/rxrpc/rxgk_common.h | 92 +--------------------------------------------- + net/rxrpc/rxkad.c | 29 +++++--------- + 7 files changed, 81 insertions(+), 224 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -307,15 +307,16 @@ struct rxrpc_security { + struct sk_buff *challenge); + + /* verify a response */ +- int (*verify_response)(struct rxrpc_connection *, +- struct sk_buff *); ++ int (*verify_response)(struct rxrpc_connection *conn, ++ struct sk_buff *response_skb, ++ void *response, unsigned int len); + + /* clear connection security */ + void (*clear)(struct rxrpc_connection *); + + /* Default ticket -> key decoder */ + int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb, +- unsigned int ticket_offset, unsigned int ticket_len, ++ void *ticket, unsigned int ticket_len, + struct key **_key); + }; + +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -243,28 +243,22 @@ static void rxrpc_call_is_secure(struct + static int rxrpc_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb) + { ++ unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); ++ void *buffer; + int ret; + +- if (skb_cloned(skb) || skb_has_frag_list(skb) || +- skb_has_shared_frag(skb)) { +- /* Copy the packet if shared so that we can do in-place +- * decryption. +- */ +- struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); ++ buffer = kmalloc(len, GFP_NOFS); ++ if (!buffer) ++ return -ENOMEM; + +- if (nskb) { +- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); +- ret = conn->security->verify_response(conn, nskb); +- rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); +- } else { +- /* OOM - Drop the packet. */ +- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); +- ret = -ENOMEM; +- } +- } else { +- ret = conn->security->verify_response(conn, skb); +- } ++ ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), buffer, len); ++ if (ret < 0) ++ goto out; + ++ ret = conn->security->verify_response(conn, skb, buffer, len); ++ ++out: ++ kfree(buffer); + return ret; + } + +--- a/net/rxrpc/insecure.c ++++ b/net/rxrpc/insecure.c +@@ -54,9 +54,10 @@ static int none_sendmsg_respond_to_chall + } + + static int none_verify_response(struct rxrpc_connection *conn, +- struct sk_buff *skb) ++ struct sk_buff *response_skb, ++ void *response, unsigned int len) + { +- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, ++ return rxrpc_abort_conn(conn, response_skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_response); + } + +--- a/net/rxrpc/rxgk.c ++++ b/net/rxrpc/rxgk.c +@@ -1084,11 +1084,12 @@ static int rxgk_sendmsg_respond_to_chall + * unsigned int call_numbers<>; + * }; + */ +-static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, +- const struct krb5_enctype *krb5, +- struct sk_buff *skb, +- __be32 *p, __be32 *end) ++static int rxgk_verify_authenticator(struct rxrpc_connection *conn, ++ const struct krb5_enctype *krb5, ++ struct sk_buff *skb, ++ void *auth, unsigned int auth_len) + { ++ __be32 *p = auth, *end = auth + auth_len; + u32 app_len, call_count, level, epoch, cid, i; + + _enter(""); +@@ -1152,37 +1153,6 @@ static int rxgk_do_verify_authenticator( + } + + /* +- * Extract the authenticator and verify it. +- */ +-static int rxgk_verify_authenticator(struct rxrpc_connection *conn, +- const struct krb5_enctype *krb5, +- struct sk_buff *skb, +- unsigned int auth_offset, unsigned int auth_len) +-{ +- void *auth; +- __be32 *p; +- int ret; +- +- auth = kmalloc(auth_len, GFP_NOFS); +- if (!auth) +- return -ENOMEM; +- +- ret = skb_copy_bits(skb, auth_offset, auth, auth_len); +- if (ret < 0) { +- ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, +- rxgk_abort_resp_short_auth); +- goto error; +- } +- +- p = auth; +- ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, +- p + auth_len / sizeof(*p)); +-error: +- kfree(auth); +- return ret; +-} +- +-/* + * Verify a response. + * + * struct RXGK_Response { +@@ -1192,49 +1162,45 @@ error: + * }; + */ + static int rxgk_verify_response(struct rxrpc_connection *conn, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ void *buffer, unsigned int len) + { + const struct krb5_enctype *krb5; + struct rxrpc_key_token *token; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- struct rxgk_response rhdr; ++ struct rxgk_response *rhdr; + struct rxgk_context *gk; + struct key *key = NULL; +- unsigned int offset = sizeof(struct rxrpc_wire_header); +- unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); +- unsigned int token_offset, token_len; +- unsigned int auth_offset, auth_len; ++ unsigned int resp_token_len, auth_len; ++ void *resp_token, *auth; + __be32 xauth_len; + int ret, ec; + + _enter("{%d}", conn->debug_id); + + /* Parse the RXGK_Response object */ +- if (sizeof(rhdr) + sizeof(__be32) > len) ++ if (len < sizeof(*rhdr) + sizeof(__be32)) + goto short_packet; +- +- if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0) +- goto short_packet; +- offset += sizeof(rhdr); +- len -= sizeof(rhdr); +- +- token_offset = offset; +- token_len = ntohl(rhdr.token_len); +- if (token_len > len || +- xdr_round_up(token_len) + sizeof(__be32) > len) ++ rhdr = buffer; ++ buffer += sizeof(*rhdr); ++ len -= sizeof(*rhdr); ++ ++ resp_token = buffer; ++ resp_token_len = ntohl(rhdr->token_len); ++ if (resp_token_len > len || ++ xdr_round_up(resp_token_len) + sizeof(__be32) > len) + goto short_packet; + +- trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len); ++ trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, resp_token_len); + +- offset += xdr_round_up(token_len); +- len -= xdr_round_up(token_len); ++ buffer += xdr_round_up(resp_token_len); ++ len -= xdr_round_up(resp_token_len); + +- if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0) +- goto short_packet; +- offset += sizeof(xauth_len); ++ xauth_len = *(__be32 *)buffer; ++ buffer += sizeof(xauth_len); + len -= sizeof(xauth_len); + +- auth_offset = offset; ++ auth = buffer; + auth_len = ntohl(xauth_len); + if (auth_len > len) + goto short_packet; +@@ -1249,7 +1215,7 @@ static int rxgk_verify_response(struct r + * to the app to deal with - which might mean a round trip to + * userspace. + */ +- ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key); ++ ret = rxgk_extract_token(conn, skb, resp_token, resp_token_len, &key); + if (ret < 0) + goto out; + +@@ -1263,7 +1229,7 @@ static int rxgk_verify_response(struct r + */ + token = key->payload.data[0]; + conn->security_level = token->rxgk->level; +- conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time); ++ conn->rxgk.start_time = __be64_to_cpu(rhdr->start_time); + + gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS); + if (IS_ERR(gk)) { +@@ -1273,18 +1239,18 @@ static int rxgk_verify_response(struct r + + krb5 = gk->krb5; + +- trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len); ++ trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, ++ resp_token_len); + + /* Decrypt, parse and verify the authenticator. */ +- ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb, +- &auth_offset, &auth_len, &ec); ++ ret = rxgk_decrypt(krb5, gk->resp_enc, &auth, &auth_len, &ec); + if (ret < 0) { + rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret, + rxgk_abort_resp_auth_dec); + goto out_gk; + } + +- ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len); ++ ret = rxgk_verify_authenticator(conn, krb5, skb, auth, auth_len); + if (ret < 0) + goto out_gk; + +--- a/net/rxrpc/rxgk_app.c ++++ b/net/rxrpc/rxgk_app.c +@@ -40,7 +40,7 @@ + * }; + */ + int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, +- unsigned int ticket_offset, unsigned int ticket_len, ++ void *buffer, unsigned int ticket_len, + struct key **_key) + { + struct rxrpc_key_token *token; +@@ -49,7 +49,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_ + size_t pre_ticket_len, payload_len; + unsigned int klen, enctype; + void *payload, *ticket; +- __be32 *t, *p, *q, tmp[2]; ++ __be32 *t, *p, *q, *tmp; + int ret; + + _enter(""); +@@ -59,10 +59,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_ + rxgk_abort_resp_short_yfs_tkt); + + /* Get the session key length */ +- ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp)); +- if (ret < 0) +- return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, +- rxgk_abort_resp_short_yfs_klen); ++ tmp = buffer; + enctype = ntohl(tmp[0]); + klen = ntohl(tmp[1]); + +@@ -84,12 +81,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_ + * it. + */ + ticket = payload + pre_ticket_len; +- ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len); +- if (ret < 0) { +- ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, +- rxgk_abort_resp_short_yfs_tkt); +- goto error; +- } ++ memcpy(ticket, buffer, ticket_len); + + /* Fill out the form header. */ + p = payload; +@@ -131,7 +123,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_ + goto error; + } + +- /* Ticket read in with skb_copy_bits above */ ++ /* Ticket appended above. */ + q += xdr_round_up(ticket_len) / 4; + if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) { + ret = -EIO; +@@ -182,14 +174,15 @@ error: + * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1] + */ + int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, +- unsigned int token_offset, unsigned int token_len, ++ void *token, unsigned int token_len, + struct key **_key) + { + const struct krb5_enctype *krb5; + const struct krb5_buffer *server_secret; + struct crypto_aead *token_enc = NULL; + struct key *server_key; +- unsigned int ticket_offset, ticket_len; ++ unsigned int ticket_len; ++ void *ticket; + u32 kvno, enctype; + int ret, ec = 0; + +@@ -197,24 +190,23 @@ int rxgk_extract_token(struct rxrpc_conn + __be32 kvno; + __be32 enctype; + __be32 token_len; +- } container; ++ } *container; + +- if (token_len < sizeof(container)) ++ if (token_len < sizeof(*container)) + goto short_packet; + + /* Decode the RXGK_TokenContainer object. This tells us which server + * key we should be using. We can then fetch the key, get the secret + * and set up the crypto to extract the token. + */ +- if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0) +- goto short_packet; ++ container = token; ++ token += sizeof(*container); + +- kvno = ntohl(container.kvno); +- enctype = ntohl(container.enctype); +- ticket_len = ntohl(container.token_len); +- ticket_offset = token_offset + sizeof(container); ++ kvno = ntohl(container->kvno); ++ enctype = ntohl(container->enctype); ++ ticket_len = ntohl(container->token_len); + +- if (ticket_len > xdr_round_down(token_len - sizeof(container))) ++ if (ticket_len > xdr_round_down(token_len - sizeof(*container))) + goto short_packet; + + _debug("KVNO %u", kvno); +@@ -237,8 +229,8 @@ int rxgk_extract_token(struct rxrpc_conn + * gain access to K0, from which we can derive the transport key and + * thence decode the authenticator. + */ +- ret = rxgk_decrypt_skb(krb5, token_enc, skb, +- &ticket_offset, &ticket_len, &ec); ++ ticket = token; ++ ret = rxgk_decrypt(krb5, token_enc, &ticket, &ticket_len, &ec); + crypto_free_aead(token_enc); + token_enc = NULL; + if (ret < 0) { +@@ -248,7 +240,7 @@ int rxgk_extract_token(struct rxrpc_conn + return ret; + } + +- ret = conn->security->default_decode_ticket(conn, skb, ticket_offset, ++ ret = conn->security->default_decode_ticket(conn, skb, ticket, + ticket_len, _key); + if (ret < 0) + goto cant_get_token; +--- a/net/rxrpc/rxgk_common.h ++++ b/net/rxrpc/rxgk_common.h +@@ -41,10 +41,10 @@ struct rxgk_context { + * rxgk_app.c + */ + int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, +- unsigned int ticket_offset, unsigned int ticket_len, ++ void *ticket, unsigned int ticket_len, + struct key **_key); + int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, +- unsigned int token_offset, unsigned int token_len, ++ void *token, unsigned int token_len, + struct key **_key); + + /* +@@ -62,50 +62,6 @@ int rxgk_set_up_token_cipher(const struc + gfp_t gfp); + + /* +- * Apply decryption and checksumming functions to part of an skbuff. The +- * offset and length are updated to reflect the actual content of the encrypted +- * region. +- */ +-static inline +-int rxgk_decrypt_skb(const struct krb5_enctype *krb5, +- struct crypto_aead *aead, +- struct sk_buff *skb, +- unsigned int *_offset, unsigned int *_len, +- int *_error_code) +-{ +- struct scatterlist sg[16]; +- size_t offset = 0, len = *_len; +- int nr_sg, ret; +- +- sg_init_table(sg, ARRAY_SIZE(sg)); +- nr_sg = skb_to_sgvec(skb, sg, *_offset, len); +- if (unlikely(nr_sg < 0)) +- return nr_sg; +- +- ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg, +- &offset, &len); +- switch (ret) { +- case 0: +- *_offset += offset; +- *_len = len; +- break; +- case -EBADMSG: /* Checksum mismatch. */ +- case -EPROTO: +- *_error_code = RXGK_SEALEDINCON; +- break; +- case -EMSGSIZE: +- *_error_code = RXGK_PACKETSHORT; +- break; +- case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ +- default: +- *_error_code = RXGK_INCONSISTENCY; +- break; +- } +- +- return ret; +-} +- +-/* + * Apply decryption and checksumming functions a flat data buffer. The data + * point and length are updated to reflect the actual content of the encrypted + * region. +@@ -136,50 +92,6 @@ static inline int rxgk_decrypt(const str + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; +- case -EMSGSIZE: +- *_error_code = RXGK_PACKETSHORT; +- break; +- case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ +- default: +- *_error_code = RXGK_INCONSISTENCY; +- break; +- } +- +- return ret; +-} +- +-/* +- * Check the MIC on a region of an skbuff. The offset and length are updated +- * to reflect the actual content of the secure region. +- */ +-static inline +-int rxgk_verify_mic_skb(const struct krb5_enctype *krb5, +- struct crypto_shash *shash, +- const struct krb5_buffer *metadata, +- struct sk_buff *skb, +- unsigned int *_offset, unsigned int *_len, +- u32 *_error_code) +-{ +- struct scatterlist sg[16]; +- size_t offset = 0, len = *_len; +- int nr_sg, ret; +- +- sg_init_table(sg, ARRAY_SIZE(sg)); +- nr_sg = skb_to_sgvec(skb, sg, *_offset, len); +- if (unlikely(nr_sg < 0)) +- return nr_sg; +- +- ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg, +- &offset, &len); +- switch (ret) { +- case 0: +- *_offset += offset; +- *_len = len; +- break; +- case -EBADMSG: /* Checksum mismatch */ +- case -EPROTO: +- *_error_code = RXGK_SEALEDINCON; +- break; + case -EMSGSIZE: + *_error_code = RXGK_PACKETSHORT; + break; +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -963,7 +963,6 @@ static int rxkad_decrypt_ticket(struct r + *_expiry = 0; + + ASSERT(server_key->payload.data[0] != NULL); +- ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); + + memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); + +@@ -1112,14 +1111,15 @@ unlock: + * verify a response + */ + static int rxkad_verify_response(struct rxrpc_connection *conn, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ void *buffer, unsigned int len) + { + struct rxkad_response *response; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt session_key; + struct key *server_key; + time64_t expiry; +- void *ticket = NULL; ++ void *ticket; + u32 version, kvno, ticket_len, level; + __be32 csum; + int ret, i; +@@ -1142,13 +1142,8 @@ static int rxkad_verify_response(struct + } + } + +- ret = -ENOMEM; +- response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); +- if (!response) +- goto error; +- +- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), +- response, sizeof(*response)) < 0) { ++ response = buffer; ++ if (len < sizeof(*response)) { + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); + goto error; +@@ -1160,6 +1155,9 @@ static int rxkad_verify_response(struct + + trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); + ++ buffer += sizeof(*response); ++ len -= sizeof(*response); ++ + if (version != RXKAD_VERSION) { + ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); +@@ -1179,13 +1177,8 @@ static int rxkad_verify_response(struct + } + + /* extract the kerberos ticket and decrypt and decode it */ +- ret = -ENOMEM; +- ticket = kmalloc(ticket_len, GFP_NOFS); +- if (!ticket) +- goto error; +- +- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), +- ticket, ticket_len) < 0) { ++ ticket = buffer; ++ if (ticket_len > len) { + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto error; +@@ -1265,8 +1258,6 @@ static int rxkad_verify_response(struct + ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); + + error: +- kfree(ticket); +- kfree(response); + key_put(server_key); + _leave(" = %d", ret); + return ret; diff --git a/queue-6.18/selftests-mptcp-drop-nanoseconds-width-specifier.patch b/queue-6.18/selftests-mptcp-drop-nanoseconds-width-specifier.patch new file mode 100644 index 0000000000..ffde2ea700 --- /dev/null +++ b/queue-6.18/selftests-mptcp-drop-nanoseconds-width-specifier.patch @@ -0,0 +1,112 @@ +From stable+bounces-256838-greg=kroah.com@vger.kernel.org Sat May 30 02:21:17 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 20:20:38 -0400 +Subject: selftests: mptcp: drop nanoseconds width specifier +To: stable@vger.kernel.org +Cc: "Matthieu Baerts (NGI0)" , Paolo Abeni , Sasha Levin +Message-ID: <20260530002038.2170683-1-sashal@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +[ Upstream commit 01ff78e4b3d98689184c52d97f9575dfbdc3b10f ] + +Using the format specifier +%s%3N with GNU date is honoured, and only +prints 3 digits of the nanoseconds portion of the seconds since epoch, +which corresponds to the milliseconds. + +The uutils implementation of date currently does not honour this, and +always prints all 9 digits. This is a known issue [1], but can be worked +around by adapting this test to use nanoseconds instead of microseconds, +and then divide it by 1e6. + +This fix is similar to what has been done on systemd side [2], and it is +needed to run the selftests on Ubuntu 26.04, containing uutils 0.8.0. + +Note that the Fixes tag is there even if this patch doesn't fix an issue +in the kernel selftests, but it is useful for those using uutils 0.8.0. + +Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") +Cc: stable@vger.kernel.org +Link: https://github.com/uutils/coreutils/issues/11658 [1] +Link: https://github.com/systemd/systemd/pull/41627 [2] +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-6-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +[ kept `timeout ${timeout_test}` wrapper in do_transfer() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_connect.sh | 6 +++--- + tools/testing/selftests/net/mptcp/mptcp_lib.sh | 10 +++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh +@@ -415,7 +415,7 @@ do_transfer() + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" + + local start +- start=$(date +%s%3N) ++ start=$(date +%s%N) + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ +@@ -428,7 +428,7 @@ do_transfer() + local rets=$? + + local stop +- stop=$(date +%s%3N) ++ stop=$(date +%s%N) + + if $capture; then + sleep 1 +@@ -444,7 +444,7 @@ do_transfer() + fi + + local duration +- duration=$((stop-start)) ++ duration=$(((stop-start) / 1000000)) + printf "(duration %05sms) " "${duration}" + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + mptcp_lib_pr_fail "client exit code $retc, server $rets" +--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh +@@ -28,7 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 + MPTCP_LIB_SUBTESTS=() + MPTCP_LIB_SUBTESTS_DUPLICATED=0 + MPTCP_LIB_SUBTEST_FLAKY=0 +-MPTCP_LIB_SUBTESTS_LAST_TS_MS= ++MPTCP_LIB_SUBTESTS_LAST_TS_NS= + MPTCP_LIB_TEST_COUNTER=0 + MPTCP_LIB_TEST_FORMAT="%02u %-50s" + MPTCP_LIB_IP_MPTCP=0 +@@ -227,7 +227,7 @@ mptcp_lib_kversion_ge() { + } + + mptcp_lib_subtests_last_ts_reset() { +- MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" ++ MPTCP_LIB_SUBTESTS_LAST_TS_NS="$(date +%s%N)" + } + mptcp_lib_subtests_last_ts_reset + +@@ -246,7 +246,7 @@ __mptcp_lib_result_check_duplicated() { + __mptcp_lib_result_add() { + local result="${1}" + local time="time=" +- local ts_prev_ms ++ local ts_prev_ns + shift + + local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) +@@ -256,9 +256,9 @@ __mptcp_lib_result_add() { + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + +- ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" ++ ts_prev_ns="${MPTCP_LIB_SUBTESTS_LAST_TS_NS}" + mptcp_lib_subtests_last_ts_reset +- time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" ++ time+="$(((MPTCP_LIB_SUBTESTS_LAST_TS_NS - ts_prev_ns) / 1000000))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") + } diff --git a/queue-6.18/serdev-provide-a-bustype-shutdown-function.patch b/queue-6.18/serdev-provide-a-bustype-shutdown-function.patch new file mode 100644 index 0000000000..072f06b743 --- /dev/null +++ b/queue-6.18/serdev-provide-a-bustype-shutdown-function.patch @@ -0,0 +1,88 @@ +From sashal@kernel.org Fri May 29 21:23:53 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:23:48 -0400 +Subject: serdev: Provide a bustype shutdown function +To: stable@vger.kernel.org +Cc: "Uwe Kleine-König" , "Greg Kroah-Hartman" , "Sasha Levin" +Message-ID: <20260529192351.1696591-1-sashal@kernel.org> + +From: Uwe Kleine-König + +[ Upstream commit 6d71c62b13c33ea858ab298fe20beaec5736edc7 ] + +To prepare serdev driver to migrate away from struct device_driver::shutdown +(and then eventually remove that callback) create a serdev driver shutdown +callback and migration code to keep the existing behaviour. Note this +introduces a warning for each driver at register time that isn't converted +yet to that callback. + +Signed-off-by: Uwe Kleine-König +Link: https://patch.msgid.link/ab518883e3ed0976a19cb5b5b5faf42bd3a655b7.1765526117.git.u.kleine-koenig@baylibre.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 375ba7484132 ("Bluetooth: hci_qca: Convert timeout from jiffies to ms") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serdev/core.c | 21 +++++++++++++++++++++ + include/linux/serdev.h | 1 + + 2 files changed, 22 insertions(+) + +--- a/drivers/tty/serdev/core.c ++++ b/drivers/tty/serdev/core.c +@@ -414,11 +414,21 @@ static void serdev_drv_remove(struct dev + sdrv->remove(to_serdev_device(dev)); + } + ++static void serdev_drv_shutdown(struct device *dev) ++{ ++ const struct serdev_device_driver *sdrv = ++ to_serdev_device_driver(dev->driver); ++ ++ if (dev->driver && sdrv->shutdown) ++ sdrv->shutdown(to_serdev_device(dev)); ++} ++ + static const struct bus_type serdev_bus_type = { + .name = "serial", + .match = serdev_device_match, + .probe = serdev_drv_probe, + .remove = serdev_drv_remove, ++ .shutdown = serdev_drv_shutdown, + }; + + /** +@@ -814,6 +824,14 @@ void serdev_controller_remove(struct ser + } + EXPORT_SYMBOL_GPL(serdev_controller_remove); + ++static void serdev_legacy_shutdown(struct serdev_device *serdev) ++{ ++ struct device *dev = &serdev->dev; ++ struct device_driver *driver = dev->driver; ++ ++ driver->shutdown(dev); ++} ++ + /** + * __serdev_device_driver_register() - Register client driver with serdev core + * @sdrv: client driver to be associated with client-device. +@@ -830,6 +848,9 @@ int __serdev_device_driver_register(stru + /* force drivers to async probe so I/O is possible in probe */ + sdrv->driver.probe_type = PROBE_PREFER_ASYNCHRONOUS; + ++ if (!sdrv->shutdown && sdrv->driver.shutdown) ++ sdrv->shutdown = serdev_legacy_shutdown; ++ + return driver_register(&sdrv->driver); + } + EXPORT_SYMBOL_GPL(__serdev_device_driver_register); +--- a/include/linux/serdev.h ++++ b/include/linux/serdev.h +@@ -65,6 +65,7 @@ struct serdev_device_driver { + struct device_driver driver; + int (*probe)(struct serdev_device *); + void (*remove)(struct serdev_device *); ++ void (*shutdown)(struct serdev_device *); + }; + + static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d) diff --git a/queue-6.18/series b/queue-6.18/series index 411ce56f3f..a08cb75664 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -279,3 +279,25 @@ serial-zs-convert-to-use-a-platform-device.patch serial-core-introduce-guard-uart_port_lock_check_sysrq_irqsave.patch serial-8250-dispatch-sysrq-character-in-serial8250_handle_irq.patch serial-8250_dw-dispatch-sysrq-character-in-dw8250_handle_irq.patch +platform-x86-intel-vsec-refactor-base_addr-handling.patch +platform-x86-intel-vsec-make-driver_data-info-const.patch +platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch +x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch +rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch +rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch +serdev-provide-a-bustype-shutdown-function.patch +bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch +bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch +selftests-mptcp-drop-nanoseconds-width-specifier.patch +net-devmem-reject-dma-buf-bind-with-non-page-aligned-size-or-sg-length.patch +mptcp-handle-first-subflow-closing-consistently.patch +mptcp-borrow-forward-memory-from-subflow.patch +mptcp-do-not-drop-partial-packets.patch +arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch +octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch +mptcp-cleanup-fallback-dummy-mapping-generation.patch +mptcp-reset-rcv-wnd-on-disconnect.patch +cpufreq-intel_pstate-add-and-use-hybrid_get_cpu_type.patch +cpufreq-intel_pstate-use-correct-scaling-factor-on-raptor-lake-e.patch +xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch +usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch diff --git a/queue-6.18/usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch b/queue-6.18/usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch new file mode 100644 index 0000000000..af8fba250f --- /dev/null +++ b/queue-6.18/usb-dwc3-xilinx-fix-error-handling-in-zynqmp-init-error-paths.patch @@ -0,0 +1,96 @@ +From sashal@kernel.org Fri Jun 5 15:40:03 2026 +From: Sasha Levin +Date: Fri, 5 Jun 2026 09:39:59 -0400 +Subject: usb: dwc3: xilinx: fix error handling in zynqmp init error paths +To: stable@vger.kernel.org +Cc: Radhey Shyam Pandey , Thinh Nguyen , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20260605133959.760692-1-sashal@kernel.org> + +From: Radhey Shyam Pandey + +[ Upstream commit c1a0ecbf32c4b397353204e2ec94c5bb9f3300ed ] + +Fix error handling and resource cleanup i.e remove invalid +phy_exit() after failed phy_init(), route failures through +proper cleanup paths and return 0 explicitly on success. + +Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms") +Cc: stable@vger.kernel.org +Acked-by: Thinh Nguyen +Signed-off-by: Radhey Shyam Pandey +Link: https://patch.msgid.link/20260519115529.2980421-1-radhey.shyam.pandey@amd.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/dwc3/dwc3-xilinx.c | 27 +++++++++++++++------------ + 1 file changed, 15 insertions(+), 12 deletions(-) + +--- a/drivers/usb/dwc3/dwc3-xilinx.c ++++ b/drivers/usb/dwc3/dwc3-xilinx.c +@@ -190,15 +190,13 @@ static int dwc3_xlnx_init_zynqmp(struct + } + + ret = phy_init(priv_data->usb3_phy); +- if (ret < 0) { +- phy_exit(priv_data->usb3_phy); ++ if (ret < 0) + goto err; +- } + + ret = reset_control_deassert(apbrst); + if (ret < 0) { + dev_err(dev, "Failed to release APB reset\n"); +- goto err; ++ goto err_phy_exit; + } + + /* Set PIPE Power Present signal in FPD Power Present Register*/ +@@ -210,27 +208,25 @@ static int dwc3_xlnx_init_zynqmp(struct + ret = reset_control_deassert(crst); + if (ret < 0) { + dev_err(dev, "Failed to release core reset\n"); +- goto err; ++ goto err_phy_exit; + } + + ret = reset_control_deassert(hibrst); + if (ret < 0) { + dev_err(dev, "Failed to release hibernation reset\n"); +- goto err; ++ goto err_phy_exit; + } + + ret = phy_power_on(priv_data->usb3_phy); +- if (ret < 0) { +- phy_exit(priv_data->usb3_phy); +- goto err; +- } ++ if (ret < 0) ++ goto err_phy_exit; + + skip_usb3_phy: + /* ulpi reset via gpio-modepin or gpio-framework driver */ + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(reset_gpio)) { +- return dev_err_probe(dev, PTR_ERR(reset_gpio), +- "Failed to request reset GPIO\n"); ++ ret = PTR_ERR(reset_gpio); ++ goto err_phy_power_off; + } + + if (reset_gpio) { +@@ -240,6 +236,13 @@ skip_usb3_phy: + } + + dwc3_xlnx_set_coherency(priv_data, XLNX_USB_TRAFFIC_ROUTE_CONFIG); ++ ++ return 0; ++ ++err_phy_power_off: ++ phy_power_off(priv_data->usb3_phy); ++err_phy_exit: ++ phy_exit(priv_data->usb3_phy); + err: + return ret; + } diff --git a/queue-6.18/x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch b/queue-6.18/x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch new file mode 100644 index 0000000000..4c3ec34c85 --- /dev/null +++ b/queue-6.18/x86-mm-disable-broadcast-tlb-flush-when-pcid-is-disabled.patch @@ -0,0 +1,73 @@ +From stable+bounces-256690-greg=kroah.com@vger.kernel.org Fri May 29 20:08:49 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 14:07:46 -0400 +Subject: x86/mm: Disable broadcast TLB flush when PCID is disabled +To: stable@vger.kernel.org +Cc: Tom Lendacky , Dave Hansen , "Borislav Petkov (AMD)" , Rik van Riel , stable@kernel.org, Sasha Levin +Message-ID: <20260529180746.1509509-1-sashal@kernel.org> + +From: Tom Lendacky + +[ Upstream commit 44126343d58c68adaa8343fbf1c07dd20078c35e ] + +Booting with "nopcid" clears X86_FEATURE_PCID and keeps CR4.PCIDE from being +set to one. On AMD CPUs that support INVLPGB, broadcast TLB flushing remains +enabled. + +There are two checks that decide whether the global ASID code runs, +mm_global_asid() and consider_global_asid(), that key off of the +X86_FEATURE_INVLPGB feature. Once an mm becomes active on more than three +CPUs, consider_global_asid() assigns it a global ASID, after which +flush_tlb_mm_range() takes the broadcast_tlb_flush() path using a non-zero +PCID. Issuing an INVLPGB with a non-zero PCID while CR4.PCIDE is not set +results in a #GP: + + Oops: general protection fault, kernel NULL pointer dereference 0x1: 0000 [#1] SMP NOPTI + CPU: 158 UID: 0 PID: 3119 Comm: snap Not tainted 7.1.0-rc3 #1 PREEMPT(full) + Hardware name: ... + RIP: 0010:broadcast_tlb_flush + Code: ... 89 da 48 83 c8 07 <0f> 01 fe eb 08 cc cc cc ... + Call Trace: + + flush_tlb_mm_range + ptep_clear_flush + wp_page_copy + ? _raw_spin_unlock + __handle_mm_fault + handle_mm_fault + do_user_addr_fault + exc_page_fault + asm_exc_page_fault + +All processors that support broadcast TLB invalidation also have PCID support, +so it is only the "nopcid" scenario that is of concern. In this situation just +disable the broadcast TLB support using the CPUID dependency support by making +X86_FEATURE_INVLPGB dependent on X86_FEATURE_PCID. + + [ bp: Massage commit message. ] + +Fixes: 4afeb0ed1753 ("x86/mm: Enable broadcast TLB invalidation for multi-threaded processes") +Suggested-by: Dave Hansen +Assisted-by: Claude:claude-opus-4.7 +Signed-off-by: Tom Lendacky +Signed-off-by: Borislav Petkov (AMD) +Acked-by: Rik van Riel +Cc: +Link: https://patch.msgid.link/b915acfd63e8b2a094fdeb8dc608738072518764.1779296450.git.thomas.lendacky@amd.com +[ adjusted insertion point to after X86_FEATURE_SPEC_CTRL_SSBD ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/cpuid-deps.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -89,6 +89,7 @@ static const struct cpuid_dep cpuid_deps + { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, + { X86_FEATURE_FRED, X86_FEATURE_LKGS }, + { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, ++ { X86_FEATURE_INVLPGB, X86_FEATURE_PCID }, + {} + }; + diff --git a/queue-6.18/xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch b/queue-6.18/xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch new file mode 100644 index 0000000000..34977fb05a --- /dev/null +++ b/queue-6.18/xfrm-iptfs-reset-runtime-state-when-cloning-sas.patch @@ -0,0 +1,96 @@ +From stable+bounces-260689-greg=kroah.com@vger.kernel.org Fri Jun 5 15:01:30 2026 +From: Sasha Levin +Date: Fri, 5 Jun 2026 08:59:53 -0400 +Subject: xfrm: iptfs: reset runtime state when cloning SAs +To: stable@vger.kernel.org +Cc: Shaomin Chen , Steffen Klassert , Sasha Levin +Message-ID: <20260605125953.366286-1-sashal@kernel.org> + +From: Shaomin Chen + +[ Upstream commit 7f83d174073234839aea176f265e517e0d50a1d2 ] + +iptfs_clone_state() clones the IPTFS mode data with kmemdup(). This +copies runtime objects which must not be shared with the original SA, +including the embedded sk_buff_head, hrtimers, spinlock, and in-flight +reassembly/reorder state. + +If xfrm_state_migrate() fails after clone_state() but before the later +init_state() call has reinitialized those fields, the cloned state can be +destroyed by xfrm_state_gc_task() with list and timer state copied from the +original SA. With queued packets this lets the clone splice and free skbs +owned by the original IPTFS queue, leading to use-after-free and +double-free reports in iptfs_destroy_state() and skb release paths. + +Reinitialize the clone's runtime state before publishing it through +x->mode_data. Because clone_state() now publishes a destroyable mode_data +object before init_state(), take the mode callback module reference there. +Avoid taking it again from __iptfs_init_state() for the same object. + +Fixes: 0e4fbf013fa5 ("xfrm: iptfs: add user packet (tunnel ingress) handling") +Cc: stable@vger.kernel.org +Signed-off-by: Shaomin Chen +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/xfrm/xfrm_iptfs.c | 29 ++++++++++++++++++++++++----- + 1 file changed, 24 insertions(+), 5 deletions(-) + +--- a/net/xfrm/xfrm_iptfs.c ++++ b/net/xfrm/xfrm_iptfs.c +@@ -2650,7 +2650,8 @@ static void __iptfs_init_state(struct xf + x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr); + + /* Always keep a module reference when x->mode_data is set */ +- __module_get(x->mode_cbs->owner); ++ if (x->mode_data != xtfs) ++ __module_get(x->mode_cbs->owner); + + x->mode_data = xtfs; + xtfs->x = x; +@@ -2658,22 +2659,40 @@ static void __iptfs_init_state(struct xf + + static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) + { ++ struct skb_wseq *w_saved = NULL; + struct xfrm_iptfs_data *xtfs; + + xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL); + if (!xtfs) + return -ENOMEM; + +- xtfs->ra_newskb = NULL; + if (xtfs->cfg.reorder_win_size) { +- xtfs->w_saved = kcalloc(xtfs->cfg.reorder_win_size, +- sizeof(*xtfs->w_saved), GFP_KERNEL); +- if (!xtfs->w_saved) { ++ w_saved = kcalloc(xtfs->cfg.reorder_win_size, ++ sizeof(*w_saved), GFP_KERNEL); ++ if (!w_saved) { + kfree_sensitive(xtfs); + return -ENOMEM; + } + } ++ xtfs->w_saved = w_saved; + ++ __skb_queue_head_init(&xtfs->queue); ++ xtfs->queue_size = 0; ++ hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, ++ IPTFS_HRTIMER_MODE); ++ ++ spin_lock_init(&xtfs->drop_lock); ++ hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, ++ IPTFS_HRTIMER_MODE); ++ ++ xtfs->w_seq_set = false; ++ xtfs->w_wantseq = 0; ++ xtfs->w_savedlen = 0; ++ xtfs->ra_newskb = NULL; ++ xtfs->ra_wantseq = 0; ++ xtfs->ra_runtlen = 0; ++ ++ __module_get(x->mode_cbs->owner); + x->mode_data = xtfs; + xtfs->x = x; + -- 2.47.3