From: Greg Kroah-Hartman Date: Tue, 16 Jun 2026 05:01:48 +0000 (+0530) Subject: 6.6-stable patches X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=30fbab7f29e54d10f720bfdffc01bde5866a92bf;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: af_unix-cache-state-msg-in-unix_stream_read_generic.patch af_unix-fix-uaf-read-of-tail-len-in-unix_stream_data_wait.patch arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch ice-fix-vf-queue-configuration-with-low-mtu-values.patch iio-adc-npcm-convert-to-platform-remove-callback-returning-void.patch iio-adc-npcm-fix-unbalanced-clk_disable_unprepare.patch ipv6-addrconf-annotate-data-races-around-devconf-fields-ii.patch ipv6-ioam-add-null-check-for-idev-in-ipv6_hop_ioam.patch mm-damon-sysfs-schemes-delete-tried-region-in-regions_rmdirs.patch mm-memory-fix-spurious-warning-when-unmapping-device-private-exclusive-pages.patch mptcp-cleanup-fallback-dummy-mapping-generation.patch mptcp-do-not-drop-partial-packets.patch mptcp-handle-first-subflow-closing-consistently.patch mptcp-introduce-the-mptcp_init_skb-helper.patch mptcp-pm-fix-add_addr-timer-infinite-retry-on-option-space-insufficient.patch mptcp-reset-rcv-wnd-on-disconnect.patch mptcp-use-plain-bool-instead-of-custom-binary-enum.patch net-hsr-defer-node-table-free-until-after-rcu-readers.patch octeontx2-af-cgx-add-bounds-check-to-cgx_speed_mbps-index.patch octeontx2-af-replace-deprecated-strncpy-with-strscpy.patch octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch selftests-mptcp-drop-nanoseconds-width-specifier.patch serdev-make-serdev_bus_type-const.patch serdev-provide-a-bustype-shutdown-function.patch --- diff --git a/queue-6.6/af_unix-cache-state-msg-in-unix_stream_read_generic.patch b/queue-6.6/af_unix-cache-state-msg-in-unix_stream_read_generic.patch new file mode 100644 index 0000000000..c1c2e9976b --- /dev/null +++ b/queue-6.6/af_unix-cache-state-msg-in-unix_stream_read_generic.patch @@ -0,0 +1,92 @@ +From stable+bounces-256927-greg=kroah.com@vger.kernel.org Sat May 30 21:40:52 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 12:09:21 -0400 +Subject: af_unix: Cache state->msg in unix_stream_read_generic(). +To: stable@vger.kernel.org +Cc: Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20260530160922.2835319-1-sashal@kernel.org> + +From: Kuniyuki Iwashima + +[ Upstream commit 8b77338eb2af74bb93986e4a8cfd86724168fe39 ] + +In unix_stream_read_generic(), state->msg is fetched multiple times. + +Let's cache it in a local variable. + +Signed-off-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20250702223606.1054680-6-kuniyu@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: be309f8eae8b ("af_unix: Fix UAF read of tail->len in unix_stream_data_wait()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2714,20 +2714,21 @@ static int unix_stream_read_skb(struct s + static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) + { +- struct scm_cookie scm; ++ int noblock = state->flags & MSG_DONTWAIT; + struct socket *sock = state->socket; ++ struct msghdr *msg = state->msg; + struct sock *sk = sock->sk; +- struct unix_sock *u = unix_sk(sk); +- int copied = 0; ++ size_t size = state->size; + int flags = state->flags; +- int noblock = flags & MSG_DONTWAIT; + bool check_creds = false; +- int target; ++ struct scm_cookie scm; ++ unsigned int last_len; ++ struct unix_sock *u; ++ int copied = 0; + int err = 0; + long timeo; ++ int target; + int skip; +- size_t size = state->size; +- unsigned int last_len; + + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { + err = -EINVAL; +@@ -2747,6 +2748,8 @@ static int unix_stream_read_generic(stru + + memset(&scm, 0, sizeof(scm)); + ++ u = unix_sk(sk); ++ + /* Lock the socket to prevent queue disordering + * while sleeps in memcpy_tomsg + */ +@@ -2840,10 +2843,10 @@ unlock: + } + + /* Copy address just once */ +- if (state->msg && state->msg->msg_name) { +- DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, +- state->msg->msg_name); +- unix_copy_addr(state->msg, skb->sk); ++ if (msg && msg->msg_name) { ++ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); ++ ++ unix_copy_addr(msg, skb->sk); + sunaddr = NULL; + } + +@@ -2916,8 +2919,8 @@ unlock: + } while (size); + + mutex_unlock(&u->iolock); +- if (state->msg) +- scm_recv_unix(sock, state->msg, &scm, flags); ++ if (msg) ++ scm_recv_unix(sock, msg, &scm, flags); + else + scm_destroy(&scm); + out: diff --git a/queue-6.6/af_unix-fix-uaf-read-of-tail-len-in-unix_stream_data_wait.patch b/queue-6.6/af_unix-fix-uaf-read-of-tail-len-in-unix_stream_data_wait.patch new file mode 100644 index 0000000000..c03742dae6 --- /dev/null +++ b/queue-6.6/af_unix-fix-uaf-read-of-tail-len-in-unix_stream_data_wait.patch @@ -0,0 +1,167 @@ +From stable+bounces-256928-greg=kroah.com@vger.kernel.org Sat May 30 21:46:28 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 12:09:22 -0400 +Subject: af_unix: Fix UAF read of tail->len in unix_stream_data_wait() +To: stable@vger.kernel.org +Cc: Jann Horn , Kuniyuki Iwashima , Jakub Kicinski , Sasha Levin +Message-ID: <20260530160922.2835319-2-sashal@kernel.org> + +From: Jann Horn + +[ Upstream commit be309f8eae8b474a4a617eaae01324da996fc719 ] + +unix_stream_data_wait() does skb_peek_tail(&sk->sk_receive_queue) without +holding any lock that prevents SKBs on that queue from being dequeued and +freed. +This has been the case since commit 79f632c71bea ("unix/stream: fix +peeking with an offset larger than data in queue"). +The first consequence of this is that the pointer comparison +`tail != last` can be false even if `last` semantically refers to an +already-freed SKB while `tail` is a new SKB allocated at the same address; +which can cause unix_stream_data_wait() to wrongly keep blocking after new +data has arrived, but only in a weird scenario where a peeking recv() and +a normal recv() on the same socket are racing, which is probably not a +real problem. + +But since commit 2b514574f7e8 ("net: af_unix: implement splice for stream +af_unix sockets"), `tail` is actually dereferenced, which can cause UAF in +the following race scenario (where test_setup() runs single-threaded, +and afterwards, test_thread1() and test_thread2() run concurrently in +two threads: +``` +static int socks[2]; +void test_setup(void) { + socketpair(AF_UNIX, SOCK_STREAM, 0, socks); + send(socks[1], "A", 1, 0); + int peekoff = 1; + setsockopt(socks[0], SOL_SOCKET, SO_PEEK_OFF, &peekoff, sizeof(peekoff)); +} +void test_thread1(void) { + char dummy; + recv(socks[0], &dummy, 1, MSG_PEEK); +} +void test_thread2(void) { + char dummy; + recv(socks[0], &dummy, 1, 0); + shutdown(socks[1], SHUT_WR); +} +``` + +when racing like this: +``` +thread1 thread2 +unix_stream_read_generic + mutex_lock(&u->iolock) + skb_peek(&sk->sk_receive_queue) + skb_peek_next(skb, &sk->sk_receive_queue) + mutex_unlock(&u->iolock) + unix_stream_read_generic + unix_state_lock(sk) + skb_peek(&sk->sk_receive_queue) + unix_state_unlock(sk) + unix_stream_data_wait + unix_state_lock(sk) + tail = skb_peek_tail(&sk->sk_receive_queue) + spin_lock(&sk->sk_receive_queue.lock) + __skb_unlink(skb, &sk->sk_receive_queue) + spin_unlock(&sk->sk_receive_queue.lock) + consume_skb(skb) [frees the SKB] + `tail != last`: false + `tail`: true + `tail->len != last_len` ***UAF*** +``` + +Fix the UAF by removing the read of tail->len; checking tail->len would +only make sense if SKBs in the receive queue of a UNIX socket could grow, +which can no longer happen. + +Kuniyuki explained: + +> When commit 869e7c62486e ("net: af_unix: implement stream sendpage +> support") added sendpage() support, data could be appended to the last +> skb in the receiver's queue. +> +> That's why we needed to check if the length of the last skb was changed +> while waiting for new data in unix_stream_data_wait(). +> +> However, commit a0dbf5f818f9 ("af_unix: Support MSG_SPLICE_PAGES") and +> commit 57d44a354a43 ("unix: Convert unix_stream_sendpage() to use +> MSG_SPLICE_PAGES") refactored sendmsg(), and now data is always added +> to a new skb. + +That means this fix is not suitable for kernels before 6.5. + +Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") +Cc: stable@vger.kernel.org # 6.5.x +Signed-off-by: Jann Horn +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20260518-b4-unix-recv-wait-hotfix-v2-1-83e29ce8ad31@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2528,8 +2528,7 @@ static int unix_read_skb(struct sock *sk + * Sleep until more data has arrived. But check for races.. + */ + static long unix_stream_data_wait(struct sock *sk, long timeo, +- struct sk_buff *last, unsigned int last_len, +- bool freezable) ++ struct sk_buff *last, bool freezable) + { + unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; + struct sk_buff *tail; +@@ -2542,7 +2541,6 @@ static long unix_stream_data_wait(struct + + tail = skb_peek_tail(&sk->sk_receive_queue); + if (tail != last || +- (tail && tail->len != last_len) || + sk->sk_err || + (sk->sk_shutdown & RCV_SHUTDOWN) || + signal_pending(current) || +@@ -2722,7 +2720,6 @@ static int unix_stream_read_generic(stru + int flags = state->flags; + bool check_creds = false; + struct scm_cookie scm; +- unsigned int last_len; + struct unix_sock *u; + int copied = 0; + int err = 0; +@@ -2769,7 +2766,6 @@ redo: + goto unlock; + } + last = skb = skb_peek(&sk->sk_receive_queue); +- last_len = last ? last->len : 0; + + again: + #if IS_ENABLED(CONFIG_AF_UNIX_OOB) +@@ -2803,8 +2799,7 @@ again: + + mutex_unlock(&u->iolock); + +- timeo = unix_stream_data_wait(sk, timeo, last, +- last_len, freezable); ++ timeo = unix_stream_data_wait(sk, timeo, last, freezable); + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); +@@ -2822,7 +2817,6 @@ unlock: + while (skip >= unix_skb_len(skb)) { + skip -= unix_skb_len(skb); + last = skb; +- last_len = skb->len; + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (!skb) + goto again; +@@ -2908,7 +2902,6 @@ unlock: + + skip = 0; + last = skb; +- last_len = skb->len; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) diff --git a/queue-6.6/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch b/queue-6.6/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch new file mode 100644 index 0000000000..497c70a26c --- /dev/null +++ b/queue-6.6/arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch @@ -0,0 +1,55 @@ +From stable+bounces-259400-greg=kroah.com@vger.kernel.org Mon Jun 1 06:23:08 2026 +From: Sasha Levin +Date: Sun, 31 May 2026 20:52:59 -0400 +Subject: arm64: tlb: Flush walk cache when unsharing PMD tables +To: stable@vger.kernel.org +Cc: Zeng Heng , Catalin Marinas , Sasha Levin +Message-ID: <20260601005259.101534-1-sashal@kernel.org> + +From: Zeng Heng + +[ Upstream commit c2ff4764e03e7a8d758352f4aceb8fe1be6ac971 ] + +When huge_pmd_unshare() is called to unshare a PMD table, the +tlb_unshare_pmd_ptdesc() function sets tlb->unshared_tables=true +but the aarch64 tlb_flush() only checked tlb->freed_tables to +determine whether to use TLBF_NONE (vae1is, invalidates walk +cache) or TLBF_NOWALKCACHE (vale1is, leaf-only). + +This caused the stale PMD page table entry to remain in the walk cache +after unshare, potentially leading to incorrect page table walks. + +Fix by including unshared_tables in the check, so that when +unsharing tables, TLBF_NONE is used and the walk cache is properly +invalidated. + +Here is the detailed distinction between vae1is and vale1is: + +| Instruction Combination | Actual Invalidation Scope | +| ------------------------ | --------------------------------------------------| +| `VAE1IS` + TTL=`0` | All entries at all levels (full invalidation) | +| `VAE1IS` + TTL=`2` (L2) | Non-leaf at Level 0/1 + leaf at Level 2 | +| `VALE1IS` + TTL=`0` | Leaf entries at all levels (non-leaf not cleared) | +| `VALE1IS` + TTL=`2` (L2) | Leaf entry at Level 2 only | + +Signed-off-by: Zeng Heng +Fixes: 8ce720d5bd91 ("mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather") +Cc: +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/tlb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/include/asm/tlb.h ++++ b/arch/arm64/include/asm/tlb.h +@@ -53,7 +53,7 @@ static inline int tlb_get_level(struct m + static inline void tlb_flush(struct mmu_gather *tlb) + { + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); +- bool last_level = !tlb->freed_tables; ++ bool last_level = !(tlb->freed_tables || tlb->unshared_tables); + unsigned long stride = tlb_get_unmap_size(tlb); + int tlb_level = tlb_get_level(tlb); + diff --git a/queue-6.6/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch b/queue-6.6/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch new file mode 100644 index 0000000000..252fd04cf3 --- /dev/null +++ b/queue-6.6/bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch @@ -0,0 +1,154 @@ +From stable+bounces-256746-greg=kroah.com@vger.kernel.org Sat May 30 01:03:13 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:33:03 -0400 +Subject: Bluetooth: hci_qca: Convert timeout from jiffies to ms +To: stable@vger.kernel.org +Cc: Shuai Zhang , Paul Menzel , Bartosz Golaszewski , Luiz Augusto von Dentz , Sasha Levin +Message-ID: <20260529193303.1704693-4-sashal@kernel.org> + +From: Shuai Zhang + +[ Upstream commit 375ba7484132662a4a8c7547d088fb6275c00282 ] + +Since the timer uses jiffies as its unit rather than ms, the timeout value +must be converted from ms to jiffies when configuring the timer. Otherwise, +the intended 8s timeout is incorrectly set to approximately 33s. + +To improve readability, embed msecs_to_jiffies() directly in the macro +definitions and drop the _MS suffix from macros that now yield jiffies +values: MEMDUMP_TIMEOUT, FW_DOWNLOAD_TIMEOUT, IBS_DISABLE_SSR_TIMEOUT, +CMD_TRANS_TIMEOUT, and IBS_BTSOC_TX_IDLE_TIMEOUT. + +IBS_WAKE_RETRANS_TIMEOUT_MS and IBS_HOST_TX_IDLE_TIMEOUT_MS are +intentionally left unchanged. Their values are stored in the struct fields +wake_retrans and tx_idle_delay, which hold ms values at runtime and can be +modified via debugfs. The msecs_to_jiffies() conversion happens at each +call site against the field value, so it cannot be embedded in the macro. + +Wake timer depends on commit c347ca17d62a + +Cc: stable@vger.kernel.org +Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR") +Reviewed-by: Paul Menzel +Acked-by: Bartosz Golaszewski +Signed-off-by: Shuai Zhang +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/hci_qca.c | 33 ++++++++++++++++----------------- + 1 file changed, 16 insertions(+), 17 deletions(-) + +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -46,13 +46,12 @@ + #define HCI_MAX_IBS_SIZE 10 + + #define IBS_WAKE_RETRANS_TIMEOUT_MS 100 +-#define IBS_BTSOC_TX_IDLE_TIMEOUT_MS 200 ++#define IBS_BTSOC_TX_IDLE_TIMEOUT msecs_to_jiffies(200) + #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 +-#define CMD_TRANS_TIMEOUT_MS 100 +-#define MEMDUMP_TIMEOUT_MS 8000 +-#define IBS_DISABLE_SSR_TIMEOUT_MS \ +- (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS) +-#define FW_DOWNLOAD_TIMEOUT_MS 3000 ++#define CMD_TRANS_TIMEOUT msecs_to_jiffies(100) ++#define MEMDUMP_TIMEOUT msecs_to_jiffies(8000) ++#define FW_DOWNLOAD_TIMEOUT msecs_to_jiffies(3000) ++#define IBS_DISABLE_SSR_TIMEOUT (MEMDUMP_TIMEOUT + FW_DOWNLOAD_TIMEOUT) + + /* susclk rate */ + #define SUSCLK_RATE_32KHZ 32768 +@@ -1077,7 +1076,7 @@ static void qca_controller_memdump(struc + + queue_delayed_work(qca->workqueue, + &qca->ctrl_memdump_timeout, +- msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)); ++ MEMDUMP_TIMEOUT); + skb_pull(skb, sizeof(qca_memdump->ram_dump_size)); + qca_memdump->current_seq_no = 0; + qca_memdump->received_dump = 0; +@@ -1349,7 +1348,7 @@ static int qca_set_baudrate(struct hci_d + + if (hu->serdev) + serdev_device_wait_until_sent(hu->serdev, +- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); ++ CMD_TRANS_TIMEOUT); + + /* Give the controller time to process the request */ + switch (qca_soc_type(hu)) { +@@ -1380,8 +1379,8 @@ static inline void host_set_baudrate(str + + static int qca_send_power_pulse(struct hci_uart *hu, bool on) + { ++ int timeout = CMD_TRANS_TIMEOUT; + int ret; +- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); + u8 cmd = on ? QCA_WCN3990_POWERON_PULSE : QCA_WCN3990_POWEROFF_PULSE; + + /* These power pulses are single byte command which are sent +@@ -1583,7 +1582,7 @@ static void qca_wait_for_dump_collection + struct qca_data *qca = hu->priv; + + wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, +- TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); ++ TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT); + + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + } +@@ -2428,7 +2427,7 @@ static void qca_serdev_remove(struct ser + static void qca_serdev_shutdown(struct serdev_device *serdev) + { + int ret; +- int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); ++ int timeout = CMD_TRANS_TIMEOUT; + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct hci_uart *hu = &qcadev->serdev_hu; + struct hci_dev *hdev = hu->hdev; +@@ -2485,7 +2484,7 @@ static int __maybe_unused qca_suspend(st + bool tx_pending = false; + int ret = 0; + u8 cmd; +- u32 wait_timeout = 0; ++ unsigned long wait_timeout = 0; + + set_bit(QCA_SUSPENDING, &qca->flags); + +@@ -2506,15 +2505,15 @@ static int __maybe_unused qca_suspend(st + if (test_bit(QCA_IBS_DISABLED, &qca->flags) || + test_bit(QCA_SSR_TRIGGERED, &qca->flags)) { + wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? +- IBS_DISABLE_SSR_TIMEOUT_MS : +- FW_DOWNLOAD_TIMEOUT_MS; ++ IBS_DISABLE_SSR_TIMEOUT : ++ FW_DOWNLOAD_TIMEOUT; + + /* QCA_IBS_DISABLED flag is set to true, During FW download + * and during memory dump collection. It is reset to false, + * After FW download complete. + */ + wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, +- TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); ++ TASK_UNINTERRUPTIBLE, wait_timeout); + + if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { + bt_dev_err(hu->hdev, "SSR or FW download time out"); +@@ -2566,7 +2565,7 @@ static int __maybe_unused qca_suspend(st + + if (tx_pending) { + serdev_device_wait_until_sent(hu->serdev, +- msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); ++ CMD_TRANS_TIMEOUT); + serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu); + } + +@@ -2575,7 +2574,7 @@ static int __maybe_unused qca_suspend(st + */ + ret = wait_event_interruptible_timeout(qca->suspend_wait_q, + qca->rx_ibs_state == HCI_IBS_RX_ASLEEP, +- msecs_to_jiffies(IBS_BTSOC_TX_IDLE_TIMEOUT_MS)); ++ IBS_BTSOC_TX_IDLE_TIMEOUT); + if (ret == 0) { + ret = -ETIMEDOUT; + goto error; diff --git a/queue-6.6/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch b/queue-6.6/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch new file mode 100644 index 0000000000..4c19270c74 --- /dev/null +++ b/queue-6.6/bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch @@ -0,0 +1,53 @@ +From stable+bounces-256745-greg=kroah.com@vger.kernel.org Sat May 30 01:04:57 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:33:02 -0400 +Subject: Bluetooth: hci_qca: Migrate to serdev specific shutdown function +To: stable@vger.kernel.org +Cc: "Uwe Kleine-König" , "Greg Kroah-Hartman" , "Sasha Levin" +Message-ID: <20260529193303.1704693-3-sashal@kernel.org> + +From: Uwe Kleine-König + +[ Upstream commit 12a6a5726c515455935982429ac35dee2307233d ] + +This saves a cast in the driver. The motivation is stop using the callback +.shutdown in qca_serdev_driver.driver to make it possible to drop that. + +Signed-off-by: Uwe Kleine-König +Link: https://patch.msgid.link/261a3384e25c4837d4efee87958805f15d7d4e3c.1765526117.git.u.kleine-koenig@baylibre.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 375ba7484132 ("Bluetooth: hci_qca: Convert timeout from jiffies to ms") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/hci_qca.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -2425,11 +2425,10 @@ static void qca_serdev_remove(struct ser + hci_uart_unregister_device(&qcadev->serdev_hu); + } + +-static void qca_serdev_shutdown(struct device *dev) ++static void qca_serdev_shutdown(struct serdev_device *serdev) + { + int ret; + int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); +- struct serdev_device *serdev = to_serdev_device(dev); + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct hci_uart *hu = &qcadev->serdev_hu; + struct hci_dev *hdev = hu->hdev; +@@ -2650,11 +2649,11 @@ static void hciqca_coredump(struct devic + static struct serdev_device_driver qca_serdev_driver = { + .probe = qca_serdev_probe, + .remove = qca_serdev_remove, ++ .shutdown = qca_serdev_shutdown, + .driver = { + .name = "hci_uart_qca", + .of_match_table = of_match_ptr(qca_bluetooth_of_match), + .acpi_match_table = ACPI_PTR(qca_bluetooth_acpi_match), +- .shutdown = qca_serdev_shutdown, + .pm = &qca_pm_ops, + #ifdef CONFIG_DEV_COREDUMP + .coredump = hciqca_coredump, diff --git a/queue-6.6/ice-fix-vf-queue-configuration-with-low-mtu-values.patch b/queue-6.6/ice-fix-vf-queue-configuration-with-low-mtu-values.patch new file mode 100644 index 0000000000..0317c38fe9 --- /dev/null +++ b/queue-6.6/ice-fix-vf-queue-configuration-with-low-mtu-values.patch @@ -0,0 +1,63 @@ +From stable+bounces-256849-greg=kroah.com@vger.kernel.org Sat May 30 06:54:08 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 21:20:52 -0400 +Subject: ice: fix VF queue configuration with low MTU values +To: stable@vger.kernel.org +Cc: Jose Ignacio Tornos Martinez , Jacob Keller , Michal Swiatkowski , Paul Menzel , Rafal Romanowski , Tony Nguyen , Jakub Kicinski , Sasha Levin +Message-ID: <20260530012052.2574044-1-sashal@kernel.org> + +From: Jose Ignacio Tornos Martinez + +[ Upstream commit 3ba4dd024d26372733d1c02e13e076c6016e3320 ] + +The ice driver's VF queue configuration validation rejects +databuffer_size values below 1024 bytes, which prevents VFs from +using MTU values below 871 bytes. + +The iavf driver calculates databuffer_size based on the MTU using: + databuffer_size = ALIGN(MTU + LIBETH_RX_LL_LEN, 128) + +where LIBETH_RX_LL_LEN = 26 (ETH_HLEN + 2*VLAN_HLEN + ETH_FCS_LEN). + +For MTU values below 871: + MTU 870: 870 + 26 = 896, aligned to 128 = 896 (< 1024, rejected) + MTU 871: 871 + 26 = 897, aligned to 128 = 1024 (>= 1024, accepted) + +The 1024-byte minimum seems unnecessarily restrictive, because the hardware +supports databuffer_size as low as 128 bytes (the alignment boundary), +which should allow MTU values down to the standard minimum of 68 bytes. + +I haven't found the reason why the limit was configured in the commit +9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF message"), so +with no more information and since it is working, change the minimum +databuffer_size validation from 1024 to 128 bytes to allow standard low +MTU values while still preventing invalid configurations. + +Fixes: 9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF message") +cc: stable@vger.kernel.org +Signed-off-by: Jose Ignacio Tornos Martinez +Reviewed-by: Jacob Keller +Reviewed-by: Michal Swiatkowski +Reviewed-by: Paul Menzel +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Link: https://patch.msgid.link/20260515182419.1597859-3-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +[ applied the change to ice_virtchnl.c ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice_virtchnl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +@@ -1681,7 +1681,7 @@ static int ice_vc_cfg_qs_msg(struct ice_ + + if (qpi->rxq.databuffer_size != 0 && + (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || +- qpi->rxq.databuffer_size < 1024)) ++ qpi->rxq.databuffer_size < 128)) + goto error_param; + vsi->rx_buf_len = qpi->rxq.databuffer_size; + ring->rx_buf_len = vsi->rx_buf_len; diff --git a/queue-6.6/iio-adc-npcm-convert-to-platform-remove-callback-returning-void.patch b/queue-6.6/iio-adc-npcm-convert-to-platform-remove-callback-returning-void.patch new file mode 100644 index 0000000000..ac524a8b47 --- /dev/null +++ b/queue-6.6/iio-adc-npcm-convert-to-platform-remove-callback-returning-void.patch @@ -0,0 +1,60 @@ +From stable+bounces-260463-greg=kroah.com@vger.kernel.org Thu Jun 4 17:10:09 2026 +From: Sasha Levin +Date: Thu, 4 Jun 2026 07:25:55 -0400 +Subject: iio: adc: npcm: Convert to platform remove callback returning void +To: stable@vger.kernel.org +Cc: "Uwe Kleine-König" , "Jonathan Cameron" , "Sasha Levin" +Message-ID: <20260604112556.3253850-1-sashal@kernel.org> + +From: Uwe Kleine-König + +[ Upstream commit 5253a5cc7709688b9a000f7928bfaa3366d0af98 ] + +The .remove() callback for a platform driver returns an int which makes +many driver authors wrongly assume it's possible to do error handling by +returning an error code. However the value returned is ignored (apart +from emitting a warning) and this typically results in resource leaks. +To improve here there is a quest to make the remove callback return +void. In the first step of this quest all drivers are converted to +.remove_new() which already returns void. Eventually after all drivers +are converted, .remove_new() will be renamed to .remove(). + +Trivially convert this driver from always returning zero in the remove +callback to the void returning variant. + +Signed-off-by: Uwe Kleine-König +Link: https://lore.kernel.org/r/20230919174931.1417681-18-u.kleine-koenig@pengutronix.de +Signed-off-by: Jonathan Cameron +Stable-dep-of: 0d42e2c0bd6c ("iio: adc: npcm: fix unbalanced clk_disable_unprepare()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/adc/npcm_adc.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/iio/adc/npcm_adc.c ++++ b/drivers/iio/adc/npcm_adc.c +@@ -320,7 +320,7 @@ err_disable_clk: + return ret; + } + +-static int npcm_adc_remove(struct platform_device *pdev) ++static void npcm_adc_remove(struct platform_device *pdev) + { + struct iio_dev *indio_dev = platform_get_drvdata(pdev); + struct npcm_adc *info = iio_priv(indio_dev); +@@ -333,13 +333,11 @@ static int npcm_adc_remove(struct platfo + if (!IS_ERR(info->vref)) + regulator_disable(info->vref); + clk_disable_unprepare(info->adc_clk); +- +- return 0; + } + + static struct platform_driver npcm_adc_driver = { + .probe = npcm_adc_probe, +- .remove = npcm_adc_remove, ++ .remove_new = npcm_adc_remove, + .driver = { + .name = "npcm_adc", + .of_match_table = npcm_adc_match, diff --git a/queue-6.6/iio-adc-npcm-fix-unbalanced-clk_disable_unprepare.patch b/queue-6.6/iio-adc-npcm-fix-unbalanced-clk_disable_unprepare.patch new file mode 100644 index 0000000000..eccef873a2 --- /dev/null +++ b/queue-6.6/iio-adc-npcm-fix-unbalanced-clk_disable_unprepare.patch @@ -0,0 +1,112 @@ +From stable+bounces-260464-greg=kroah.com@vger.kernel.org Thu Jun 4 16:57:06 2026 +From: Sasha Levin +Date: Thu, 4 Jun 2026 07:25:56 -0400 +Subject: iio: adc: npcm: fix unbalanced clk_disable_unprepare() +To: stable@vger.kernel.org +Cc: David Carlier , Andy Shevchenko , Stable@vger.kernel.org, Jonathan Cameron , Sasha Levin +Message-ID: <20260604112556.3253850-2-sashal@kernel.org> + +From: David Carlier + +[ Upstream commit 0d42e2c0bd6ceb89e44c6e065f9bdf9b1df3ef0c ] + +The driver acquired the ADC clock with devm_clk_get() and read its +rate, but never called clk_prepare_enable(). The probe error path and +npcm_adc_remove() both called clk_disable_unprepare() unconditionally, +causing the clk framework's enable/prepare counts to underflow on +probe failure or module unbind. + +The issue went unnoticed because NPCM BMC firmware leaves the ADC +clock enabled at boot, so the driver happened to work in practice. + +Switch to devm_clk_get_enabled() so the clock is properly enabled +during probe and automatically released by the device-managed +cleanup, and drop the now-redundant clk_disable_unprepare() from +both the probe error path and remove(). + +While at it, drop the duplicate error message on devm_request_irq() +failure since the IRQ core already logs it. + +Fixes: 9bf85fbc9d8f ("iio: adc: add NPCM ADC driver") +Signed-off-by: David Carlier +Reviewed-by: Andy Shevchenko +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/adc/npcm_adc.c | 25 ++++++++----------------- + 1 file changed, 8 insertions(+), 17 deletions(-) + +--- a/drivers/iio/adc/npcm_adc.c ++++ b/drivers/iio/adc/npcm_adc.c +@@ -231,7 +231,7 @@ static int npcm_adc_probe(struct platfor + if (IS_ERR(info->reset)) + return PTR_ERR(info->reset); + +- info->adc_clk = devm_clk_get(&pdev->dev, NULL); ++ info->adc_clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(info->adc_clk)) { + dev_warn(&pdev->dev, "ADC clock failed: can't read clk\n"); + return PTR_ERR(info->adc_clk); +@@ -244,17 +244,13 @@ static int npcm_adc_probe(struct platfor + info->adc_sample_hz = clk_get_rate(info->adc_clk) / ((div + 1) * 2); + + irq = platform_get_irq(pdev, 0); +- if (irq < 0) { +- ret = irq; +- goto err_disable_clk; +- } ++ if (irq < 0) ++ return irq; + + ret = devm_request_irq(&pdev->dev, irq, npcm_adc_isr, 0, + "NPCM_ADC", indio_dev); +- if (ret < 0) { +- dev_err(dev, "failed requesting interrupt\n"); +- goto err_disable_clk; +- } ++ if (ret < 0) ++ return ret; + + reg_con = ioread32(info->regs + NPCM_ADCCON); + info->vref = devm_regulator_get_optional(&pdev->dev, "vref"); +@@ -262,7 +258,7 @@ static int npcm_adc_probe(struct platfor + ret = regulator_enable(info->vref); + if (ret) { + dev_err(&pdev->dev, "Can't enable ADC reference voltage\n"); +- goto err_disable_clk; ++ return ret; + } + + iowrite32(reg_con & ~NPCM_ADCCON_REFSEL, +@@ -272,10 +268,8 @@ static int npcm_adc_probe(struct platfor + * Any error which is not ENODEV indicates the regulator + * has been specified and so is a failure case. + */ +- if (PTR_ERR(info->vref) != -ENODEV) { +- ret = PTR_ERR(info->vref); +- goto err_disable_clk; +- } ++ if (PTR_ERR(info->vref) != -ENODEV) ++ return PTR_ERR(info->vref); + + /* Use internal reference */ + iowrite32(reg_con | NPCM_ADCCON_REFSEL, +@@ -314,8 +308,6 @@ err_iio_register: + iowrite32(reg_con & ~NPCM_ADCCON_ADC_EN, info->regs + NPCM_ADCCON); + if (!IS_ERR(info->vref)) + regulator_disable(info->vref); +-err_disable_clk: +- clk_disable_unprepare(info->adc_clk); + + return ret; + } +@@ -332,7 +324,6 @@ static void npcm_adc_remove(struct platf + iowrite32(regtemp & ~NPCM_ADCCON_ADC_EN, info->regs + NPCM_ADCCON); + if (!IS_ERR(info->vref)) + regulator_disable(info->vref); +- clk_disable_unprepare(info->adc_clk); + } + + static struct platform_driver npcm_adc_driver = { diff --git a/queue-6.6/ipv6-addrconf-annotate-data-races-around-devconf-fields-ii.patch b/queue-6.6/ipv6-addrconf-annotate-data-races-around-devconf-fields-ii.patch new file mode 100644 index 0000000000..ad855c7a0c --- /dev/null +++ b/queue-6.6/ipv6-addrconf-annotate-data-races-around-devconf-fields-ii.patch @@ -0,0 +1,416 @@ +From stable+bounces-256831-greg=kroah.com@vger.kernel.org Sat May 30 05:23:26 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:53:15 -0400 +Subject: ipv6/addrconf: annotate data-races around devconf fields (II) +To: stable@vger.kernel.org +Cc: Eric Dumazet , Jiri Pirko , "David S. Miller" , Sasha Levin +Message-ID: <20260529235316.1963855-1-sashal@kernel.org> + +From: Eric Dumazet + +[ Upstream commit 2f0ff05a44302c91af54a5f9efe1b65b7681540e ] + +Final (?) round of this series. + +Annotate lockless reads on following devconf fields, +because they be changed concurrently from /proc/net/ipv6/conf. + +- accept_dad +- optimistic_dad +- use_optimistic +- use_oif_addrs_only +- ra_honor_pio_life +- keep_addr_on_down +- ndisc_notify +- ndisc_evict_nocarrier +- suppress_frag_ndisc +- addr_gen_mode +- seg6_enabled +- ioam6_enabled +- ioam6_id +- ioam6_id_wide +- drop_unicast_in_l2_multicast +- mldv[12]_unsolicited_report_interval +- force_mld_version +- force_tllao +- accept_untracked_na +- drop_unsolicited_na +- accept_source_route + +Signed-off-by: Eric Dumazet +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Stable-dep-of: d4ea0dfd7501 ("ipv6: ioam: add NULL check for idev in ipv6_hop_ioam()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 47 +++++++++++++++++++++++++---------------------- + net/ipv6/exthdrs.c | 16 +++++++++------- + net/ipv6/ioam6.c | 8 ++++---- + net/ipv6/ip6_input.c | 2 +- + net/ipv6/mcast.c | 14 +++++++------- + net/ipv6/ndisc.c | 18 +++++++++--------- + net/ipv6/seg6_hmac.c | 8 +++++--- + 7 files changed, 60 insertions(+), 53 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -1520,15 +1520,17 @@ static inline int ipv6_saddr_preferred(i + return 0; + } + +-static bool ipv6_use_optimistic_addr(struct net *net, +- struct inet6_dev *idev) ++static bool ipv6_use_optimistic_addr(const struct net *net, ++ const struct inet6_dev *idev) + { + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD + if (!idev) + return false; +- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) ++ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && ++ !READ_ONCE(idev->cnf.optimistic_dad)) + return false; +- if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) ++ if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) && ++ !READ_ONCE(idev->cnf.use_optimistic)) + return false; + + return true; +@@ -1537,13 +1539,14 @@ static bool ipv6_use_optimistic_addr(str + #endif + } + +-static bool ipv6_allow_optimistic_dad(struct net *net, +- struct inet6_dev *idev) ++static bool ipv6_allow_optimistic_dad(const struct net *net, ++ const struct inet6_dev *idev) + { + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD + if (!idev) + return false; +- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) ++ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && ++ !READ_ONCE(idev->cnf.optimistic_dad)) + return false; + + return true; +@@ -1825,7 +1828,7 @@ int ipv6_dev_get_saddr(struct net *net, + idev = __in6_dev_get(dst_dev); + if ((dst_type & IPV6_ADDR_MULTICAST) || + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || +- (idev && idev->cnf.use_oif_addrs_only)) { ++ (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) { + use_oif_addr = true; + } + } +@@ -2664,8 +2667,8 @@ int addrconf_prefix_rcv_add_addr(struct + }; + + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- if ((net->ipv6.devconf_all->optimistic_dad || +- in6_dev->cnf.optimistic_dad) && ++ if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) || ++ READ_ONCE(in6_dev->cnf.optimistic_dad)) && + !net->ipv6.devconf_all->forwarding && sllao) + cfg.ifa_flags |= IFA_F_OPTIMISTIC; + #endif +@@ -3295,8 +3298,8 @@ void addrconf_add_linklocal(struct inet6 + struct inet6_ifaddr *ifp; + + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || +- idev->cnf.optimistic_dad) && ++ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) || ++ READ_ONCE(idev->cnf.optimistic_dad)) && + !dev_net(idev->dev)->ipv6.devconf_all->forwarding) + cfg.ifa_flags |= IFA_F_OPTIMISTIC; + #endif +@@ -3868,10 +3871,10 @@ static int addrconf_ifdown(struct net_de + */ + if (!unregister && !idev->cnf.disable_ipv6) { + /* aggregate the system setting and interface setting */ +- int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; ++ int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down); + + if (!_keep_addr) +- _keep_addr = idev->cnf.keep_addr_on_down; ++ _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down); + + keep_addr = (_keep_addr > 0); + } +@@ -4092,8 +4095,8 @@ static void addrconf_dad_begin(struct in + + net = dev_net(dev); + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || +- (net->ipv6.devconf_all->accept_dad < 1 && +- idev->cnf.accept_dad < 1) || ++ (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 && ++ READ_ONCE(idev->cnf.accept_dad) < 1) || + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { + bool send_na = false; +@@ -4185,8 +4188,8 @@ static void addrconf_dad_work(struct wor + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + +- if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || +- idev->cnf.accept_dad > 1) && ++ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || ++ READ_ONCE(idev->cnf.accept_dad) > 1) && + !idev->cnf.disable_ipv6 && + !(ifp->flags & IFA_F_STABLE_PRIVACY)) { + struct in6_addr addr; +@@ -4325,8 +4328,8 @@ static void addrconf_dad_completed(struc + + /* send unsolicited NA if enabled */ + if (send_na && +- (ifp->idev->cnf.ndisc_notify || +- dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { ++ (READ_ONCE(ifp->idev->cnf.ndisc_notify) || ++ READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) { + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, + /*router=*/ !!ifp->idev->cnf.forwarding, + /*solicited=*/ false, /*override=*/ true, +@@ -6522,7 +6525,7 @@ static int addrconf_sysctl_addr_gen_mode + } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { + struct net_device *dev; + +- net->ipv6.devconf_dflt->addr_gen_mode = new_val; ++ WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev && +@@ -6533,7 +6536,7 @@ static int addrconf_sysctl_addr_gen_mode + } + } + +- *((u32 *)ctl->data) = new_val; ++ WRITE_ONCE(*((u32 *)ctl->data), new_val); + } + + out: +--- a/net/ipv6/exthdrs.c ++++ b/net/ipv6/exthdrs.c +@@ -387,9 +387,8 @@ static int ipv6_srh_rcv(struct sk_buff * + return -1; + } + +- accept_seg6 = net->ipv6.devconf_all->seg6_enabled; +- if (accept_seg6 > idev->cnf.seg6_enabled) +- accept_seg6 = idev->cnf.seg6_enabled; ++ accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), ++ READ_ONCE(idev->cnf.seg6_enabled)); + + if (!accept_seg6) { + kfree_skb(skb); +@@ -666,10 +665,13 @@ static int ipv6_rthdr_rcv(struct sk_buff + struct ipv6_rt_hdr *hdr; + struct rt0_hdr *rthdr; + struct net *net = dev_net(skb->dev); +- int accept_source_route = net->ipv6.devconf_all->accept_source_route; ++ int accept_source_route; + +- if (idev && accept_source_route > idev->cnf.accept_source_route) +- accept_source_route = idev->cnf.accept_source_route; ++ accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); ++ ++ if (idev) ++ accept_source_route = min(accept_source_route, ++ READ_ONCE(idev->cnf.accept_source_route)); + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || + !pskb_may_pull(skb, (skb_transport_offset(skb) + +@@ -930,7 +932,7 @@ static bool ipv6_hop_ioam(struct sk_buff + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ +- if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) ++ if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) + goto ignore; + + /* Truncated Option header */ +--- a/net/ipv6/ioam6.c ++++ b/net/ipv6/ioam6.c +@@ -677,7 +677,7 @@ static void __ioam6_fill_trace_data(stru + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else +- raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; ++ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); +@@ -685,7 +685,7 @@ static void __ioam6_fill_trace_data(stru + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else +- raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; ++ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); +@@ -772,7 +772,7 @@ static void __ioam6_fill_trace_data(stru + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else +- raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; ++ raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); +@@ -780,7 +780,7 @@ static void __ioam6_fill_trace_data(stru + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else +- raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; ++ raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); +--- a/net/ipv6/ip6_input.c ++++ b/net/ipv6/ip6_input.c +@@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(stru + if (!ipv6_addr_is_multicast(&hdr->daddr) && + (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && +- idev->cnf.drop_unicast_in_l2_multicast) { ++ READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { + SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); + goto err; + } +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -159,9 +159,9 @@ static int unsolicited_report_interval(s + int iv; + + if (mld_in_v1_mode(idev)) +- iv = idev->cnf.mldv1_unsolicited_report_interval; ++ iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval); + else +- iv = idev->cnf.mldv2_unsolicited_report_interval; ++ iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval); + + return iv > 0 ? iv : 1; + } +@@ -1201,15 +1201,15 @@ static bool mld_marksources(struct ifmca + + static int mld_force_mld_version(const struct inet6_dev *idev) + { ++ const struct net *net = dev_net(idev->dev); ++ int all_force; ++ ++ all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version); + /* Normally, both are 0 here. If enforcement to a particular is + * being used, individual device enforcement will have a lower + * precedence over 'all' device (.../conf/all/force_mld_version). + */ +- +- if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0) +- return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version; +- else +- return idev->cnf.force_mld_version; ++ return all_force ?: READ_ONCE(idev->cnf.force_mld_version); + } + + static bool mld_in_v2_mode_only(const struct inet6_dev *idev) +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *s + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); +- tclass = idev ? idev->cnf.ndisc_tclass : 0; ++ tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; + rcu_read_unlock(); + + skb_push(skb, sizeof(*hdr)); +@@ -539,7 +539,7 @@ void ndisc_send_na(struct net_device *de + src_addr = solicited_addr; + if (ifp->flags & IFA_F_OPTIMISTIC) + override = false; +- inc_opt |= ifp->idev->cnf.force_tllao; ++ inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); + in6_ifa_put(ifp); + } else { + if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, +@@ -977,7 +977,7 @@ static int accept_untracked_na(struct ne + { + struct inet6_dev *idev = __in6_dev_get(dev); + +- switch (idev->cnf.accept_untracked_na) { ++ switch (READ_ONCE(idev->cnf.accept_untracked_na)) { + case 0: /* Don't accept untracked na (absent in neighbor cache) */ + return 0; + case 1: /* Create new entries from na if currently untracked */ +@@ -1028,7 +1028,7 @@ static enum skb_drop_reason ndisc_recv_n + * drop_unsolicited_na takes precedence over accept_untracked_na + */ + if (!msg->icmph.icmp6_solicited && idev && +- idev->cnf.drop_unsolicited_na) ++ READ_ONCE(idev->cnf.drop_unsolicited_na)) + return reason; + + if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) +@@ -1821,7 +1821,7 @@ static bool ndisc_suppress_frag_ndisc(st + if (!idev) + return true; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && +- idev->cnf.suppress_frag_ndisc) { ++ READ_ONCE(idev->cnf.suppress_frag_ndisc)) { + net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); + return true; + } +@@ -1898,8 +1898,8 @@ static int ndisc_netdev_event(struct not + idev = in6_dev_get(dev); + if (!idev) + break; +- if (idev->cnf.ndisc_notify || +- net->ipv6.devconf_all->ndisc_notify) ++ if (READ_ONCE(idev->cnf.ndisc_notify) || ++ READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) + ndisc_send_unsol_na(dev); + in6_dev_put(idev); + break; +@@ -1908,8 +1908,8 @@ static int ndisc_netdev_event(struct not + if (!idev) + evict_nocarrier = true; + else { +- evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && +- net->ipv6.devconf_all->ndisc_evict_nocarrier; ++ evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && ++ READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); + in6_dev_put(idev); + } + +--- a/net/ipv6/seg6_hmac.c ++++ b/net/ipv6/seg6_hmac.c +@@ -242,6 +242,7 @@ bool seg6_hmac_validate_skb(struct sk_bu + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; ++ int require_hmac; + + idev = __in6_dev_get(skb->dev); + if (!idev) +@@ -251,16 +252,17 @@ bool seg6_hmac_validate_skb(struct sk_bu + + tlv = seg6_get_tlv_hmac(srh); + ++ require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); + /* mandatory check but no tlv */ +- if (idev->cnf.seg6_require_hmac > 0 && !tlv) ++ if (require_hmac > 0 && !tlv) + return false; + + /* no check */ +- if (idev->cnf.seg6_require_hmac < 0) ++ if (require_hmac < 0) + return true; + + /* check only if present */ +- if (idev->cnf.seg6_require_hmac == 0 && !tlv) ++ if (require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ diff --git a/queue-6.6/ipv6-ioam-add-null-check-for-idev-in-ipv6_hop_ioam.patch b/queue-6.6/ipv6-ioam-add-null-check-for-idev-in-ipv6_hop_ioam.patch new file mode 100644 index 0000000000..e2fe3d3b88 --- /dev/null +++ b/queue-6.6/ipv6-ioam-add-null-check-for-idev-in-ipv6_hop_ioam.patch @@ -0,0 +1,75 @@ +From stable+bounces-256832-greg=kroah.com@vger.kernel.org Sat May 30 05:23:26 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:53:16 -0400 +Subject: ipv6: ioam: add NULL check for idev in ipv6_hop_ioam() +To: stable@vger.kernel.org +Cc: Justin Iurman , Ido Schimmel , Jakub Kicinski , Sasha Levin +Message-ID: <20260529235316.1963855-2-sashal@kernel.org> + +From: Justin Iurman + +[ Upstream commit d4ea0dfd75011b78cebf3808f98ac4c4f51a6fb9 ] + +Reported by Sashiko: + +The function ipv6_hop_ioam() accesses +__in6_dev_get(skb->dev)->cnf.ioam6_enabled without validating the returned +idev pointer. Because addrconf_ifdown() can concurrently clear dev->ip6_ptr +via RCU, __in6_dev_get() can return NULL during interface teardown, which +could cause a NULL pointer dereference when processing an IOAM Hop-by-Hop +option. + +Let's add a check and use SKB_DROP_REASON_IPV6DISABLED accordingly. + +Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") +Cc: stable@vger.kernel.org +Signed-off-by: Justin Iurman +Reviewed-by: Ido Schimmel +Link: https://patch.msgid.link/20260517183059.29140-1-justin.iurman@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/exthdrs.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/net/ipv6/exthdrs.c ++++ b/net/ipv6/exthdrs.c +@@ -923,16 +923,27 @@ static bool ipv6_hop_ra(struct sk_buff * + + static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) + { ++ enum skb_drop_reason drop_reason; + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; ++ struct inet6_dev *idev; + struct ioam6_hdr *hdr; + ++ drop_reason = SKB_DROP_REASON_IP_INHDR; ++ + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + ++ /* Does the device still have IPv6 configuration? */ ++ idev = __in6_dev_get(skb->dev); ++ if (!idev) { ++ drop_reason = SKB_DROP_REASON_IPV6DISABLED; ++ goto drop; ++ } ++ + /* Ignore if IOAM is not enabled on ingress */ +- if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) ++ if (!READ_ONCE(idev->cnf.ioam6_enabled)) + goto ignore; + + /* Truncated Option header */ +@@ -982,7 +993,7 @@ ignore: + return true; + + drop: +- kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); ++ kfree_skb_reason(skb, drop_reason); + return false; + } + diff --git a/queue-6.6/mm-damon-sysfs-schemes-delete-tried-region-in-regions_rmdirs.patch b/queue-6.6/mm-damon-sysfs-schemes-delete-tried-region-in-regions_rmdirs.patch new file mode 100644 index 0000000000..4e43c78428 --- /dev/null +++ b/queue-6.6/mm-damon-sysfs-schemes-delete-tried-region-in-regions_rmdirs.patch @@ -0,0 +1,94 @@ +From stable+bounces-260511-greg=kroah.com@vger.kernel.org Thu Jun 4 19:49:45 2026 +From: Sasha Levin +Date: Thu, 4 Jun 2026 10:13:08 -0400 +Subject: mm/damon/sysfs-schemes: delete tried region in regions_rmdirs() +To: stable@vger.kernel.org +Cc: SeongJae Park , Andrew Morton , Sasha Levin +Message-ID: <20260604141308.3549703-1-sashal@kernel.org> + +From: SeongJae Park + +[ Upstream commit 441f92f7d386b85bad16de49db95a307cba048a2 ] + +DAMON sysfs maintains the DAMOS tried region directory objects via a +linked list. When the user requests refresh of the directories, DAMON +sysfs removes all the region directories first, and then generate updated +regions directory on the empty space. The removal function +(damon_sysfs_scheme_regions_rm_dirs()) only puts the kobj objects. +Deletion of the container region object from the linked list is done +inside the kobj release callback function. + +If somehow the callback invocation is delayed, the list will contain +regions list that gonna be freed. If the updated region directories +creation is started in this situation, the list can be corrupted and +use-after-free can happen. + +Because the kobj objects are managed by only DAMON sysfs, the issue cannot +happen in normal situation. But, such delays can be made on kernels that +built with CONFIG_DEBUG_KOBJECT_RELEASE. On the kernel, the issue can +indeed be reproduced like below. + + # damo start --damos_action stat + # cd /sys/kernel/mm/damon/admin/kdamonds/0/ + # for i in {1..10}; do echo update_schemes_tried_regions > state; done + # dmesg | grep underflow + [ 89.296152] refcount_t: underflow; use-after-free. + +Fix the issue by removing the region object from the list when +decrementing the reference count. + +Also update damos_sysfs_populate_region_dir() to add the region object to +the list only after the kobject_init_and_add() is success, so that fail of +kobject_init_and_add() is not leaving the deallocated object on the list. + +The issue was discovered [1] by Sashiko. + +Link: https://lore.kernel.org/20260518152559.93038-1-sj@kernel.org +Link: https://lore.kernel.org/20260513011920.119183-1-sj@kernel.org [1] +Fixes: 9277d0367ba1 ("mm/damon/sysfs-schemes: implement scheme region directory") +Signed-off-by: SeongJae Park +Cc: # 6.2.x +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs-schemes.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/damon/sysfs-schemes.c ++++ b/mm/damon/sysfs-schemes.c +@@ -78,7 +78,6 @@ static void damon_sysfs_scheme_region_re + struct damon_sysfs_scheme_region *region = container_of(kobj, + struct damon_sysfs_scheme_region, kobj); + +- list_del(®ion->list); + kfree(region); + } + +@@ -151,7 +150,7 @@ static void damon_sysfs_scheme_regions_r + struct damon_sysfs_scheme_region *r, *next; + + list_for_each_entry_safe(r, next, ®ions->regions_list, list) { +- /* release function deletes it from the list */ ++ list_del(&r->list); + kobject_put(&r->kobj); + regions->nr_regions--; + } +@@ -1772,14 +1771,15 @@ static int damon_sysfs_before_damos_appl + region = damon_sysfs_scheme_region_alloc(r); + if (!region) + return 0; +- list_add_tail(®ion->list, &sysfs_regions->regions_list); +- sysfs_regions->nr_regions++; + if (kobject_init_and_add(®ion->kobj, + &damon_sysfs_scheme_region_ktype, + &sysfs_regions->kobj, "%d", + damon_sysfs_schemes_region_idx++)) { + kobject_put(®ion->kobj); ++ return 0; + } ++ list_add_tail(®ion->list, &sysfs_regions->regions_list); ++ sysfs_regions->nr_regions++; + return 0; + } + diff --git a/queue-6.6/mm-memory-fix-spurious-warning-when-unmapping-device-private-exclusive-pages.patch b/queue-6.6/mm-memory-fix-spurious-warning-when-unmapping-device-private-exclusive-pages.patch new file mode 100644 index 0000000000..1b54edbd4d --- /dev/null +++ b/queue-6.6/mm-memory-fix-spurious-warning-when-unmapping-device-private-exclusive-pages.patch @@ -0,0 +1,192 @@ +From stable+bounces-256787-greg=kroah.com@vger.kernel.org Sat May 30 03:12:07 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 17:41:58 -0400 +Subject: mm/memory: fix spurious warning when unmapping device-private/exclusive pages +To: stable@vger.kernel.org +Cc: "Alistair Popple" , "Arsen Arsenović" , "Balbir Singh" , "David Hildenbrand" , "Jason Gunthorpe" , "John Hubbard" , "Leon Romanovsky" , "Liam R. Howlett" , "Lorenzo Stoakes" , "Peter Xu" , "Matthew Brost" , "Michal Hocko" , "Mike Rapoport" , "Shuah Khan" , "Suren Baghdasaryan" , "Thomas Hellström" , "Vlastimil Babka" , "Andrew Morton" , "Sasha Levin" +Message-ID: <20260529214158.1792761-1-sashal@kernel.org> + +From: Alistair Popple + +[ Upstream commit be3f38d05cc5a7c3f13e51994c5dd043ab604d28 ] + +Device private and exclusive entries are only supported for anonymous +folios. This condition is tested in __migrate_device_pages() and +make_device_exclusive() using folio_test_anon(). However the unmap path +tests this assumption using vma_is_anonymous(). + +This is wrong because whilst anonymous VMAs can only contain folios where +folio_test_anon() is true the opposite relation does not hold. A folio +for which folio_test_anon() is true does not imply vma_is_anonymous() is +true. Such a condition can occur if for example a folio is part of a +private filebacked mapping. + +In this case vma_is_anonymous() is false as the mapping is filebacked, but +folio_test_anon() may be true, thus permitting devices to migrate the +folio to device private memory. This can lead to the following spurious +warnings during process teardown: + +[ 772.737706] ------------[ cut here ]------------ +[ 772.739201] WARNING: mm/memory.c:1754 at unmap_page_range.cold+0x26/0x18a, CPU#17: hmm-tests/2041 +[ 772.742050] Modules linked in: test_hmm nvidia_uvm(O) nvidia(O) +[ 772.743959] CPU: 17 UID: 0 PID: 2041 Comm: hmm-tests Tainted: G W O 7.0.0+ #387 PREEMPT(full) +[ 772.747104] Tainted: [W]=WARN, [O]=OOT_MODULE +[ 772.748509] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 +[ 772.752117] RIP: 0010:unmap_page_range.cold+0x26/0x18a +[ 772.753780] Code: 7e fe ff ff 48 89 4c 24 78 4c 89 44 24 38 e8 f2 ff b1 00 48 8b 4c 24 78 4c 8b 44 24 38 48 8b 44 24 18 48 83 78 48 00 74 04 90 <0f> 0b 90 48 89 ca b8 ff ff 37 00 48 c1 ea 03 48 c1 e0 2a 80 3c 02 +[ 772.759602] RSP: 0018:ffff888112607550 EFLAGS: 00010286 +[ 772.761310] RAX: ffff88811bbf4dc0 RBX: dffffc0000000000 RCX: ffffea03e9bfffd8 +[ 772.763583] RDX: 1ffff1102377e9c1 RSI: 0000000000000008 RDI: ffff88811bbf4e08 +[ 772.765914] RBP: 0000000000000006 R08: ffff8881059f7448 R09: ffffed10224c0e68 +[ 772.768184] R10: ffff888112607347 R11: 0000000000000001 R12: 0000000000000001 +[ 772.770461] R13: ffffea03e9bfffc0 R14: ffff888112607908 R15: ffffea03e9bfffc0 +[ 772.772782] FS: 00007f327caa2780(0000) GS:ffff888427b7d000(0000) knlGS:0000000000000000 +[ 772.775328] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 772.777187] CR2: 00007f327ca89000 CR3: 00000001994d5000 CR4: 00000000000006f0 +[ 772.779135] Call Trace: +[ 772.779792] +[ 772.780317] ? dmirror_interval_invalidate+0x1a3/0x290 [test_hmm] +[ 772.781873] ? vm_normal_page_pud+0x2b0/0x2b0 +[ 772.782992] ? __rwlock_init+0x150/0x150 +[ 772.784006] ? lock_release+0x216/0x2b0 +[ 772.785008] ? __mmu_notifier_invalidate_range_start+0x505/0x6e0 +[ 772.786522] ? lock_release+0x216/0x2b0 +[ 772.787498] ? unmap_single_vma+0xb6/0x210 +[ 772.788573] unmap_vmas+0x27d/0x520 +[ 772.789506] ? unmap_single_vma+0x210/0x210 +[ 772.790607] ? mas_update_gap.part.0+0x620/0x620 +[ 772.791834] unmap_region+0x19e/0x350 +[ 772.792769] ? remove_vma+0x130/0x130 +[ 772.793684] ? mas_alloc_nodes+0x1f2/0x300 +[ 772.794730] vms_complete_munmap_vmas+0x8c1/0xe20 +[ 772.795926] ? unmap_region+0x350/0x350 +[ 772.796917] do_vmi_align_munmap+0x36a/0x4e0 +[ 772.798018] ? lock_release+0x216/0x2b0 +[ 772.799024] ? vma_shrink+0x620/0x620 +[ 772.799983] do_vmi_munmap+0x150/0x2c0 +[ 772.800939] __vm_munmap+0x161/0x2c0 +[ 772.801872] ? expand_downwards+0xd60/0xd60 +[ 772.802948] ? clockevents_program_event+0x1ef/0x540 +[ 772.804217] ? lock_release+0x216/0x2b0 +[ 772.805158] __x64_sys_munmap+0x59/0x80 +[ 772.805776] do_syscall_64+0xfc/0x670 +[ 772.806336] ? irqentry_exit+0xda/0x580 +[ 772.806976] entry_SYSCALL_64_after_hwframe+0x4b/0x53 +[ 772.807772] RIP: 0033:0x7f327cbb2717 +[ 772.808323] Code: 73 01 c3 48 8b 0d f9 76 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 76 0d 00 f7 d8 64 89 01 48 +[ 772.811337] RSP: 002b:00007ffde7f57d38 EFLAGS: 00000202 ORIG_RAX: 000000000000000b +[ 772.812564] RAX: ffffffffffffffda RBX: 00007f327cc9c000 RCX: 00007f327cbb2717 +[ 772.813733] RDX: 0000000000000000 RSI: 0000000000400000 RDI: 00007f327c289000 +[ 772.814867] RBP: 0000000000421360 R08: 000000000000001a R09: 0000000000000000 +[ 772.815991] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffde7f57d74 +[ 772.817121] R13: 00007f327c689010 R14: 0000000000100000 R15: 00007f327c289000 +[ 772.818272] +[ 772.818614] irq event stamp: 0 +[ 772.819159] hardirqs last enabled at (0): [<0000000000000000>] 0x0 +[ 772.820174] hardirqs last disabled at (0): [] copy_process+0x19f3/0x6440 +[ 772.821511] softirqs last enabled at (0): [] copy_process+0x1a40/0x6440 +[ 772.822869] softirqs last disabled at (0): [<0000000000000000>] 0x0 +[ 772.823871] ---[ end trace 0000000000000000 ]--- + +Fix this by using the same check for folio_test_anon() in +zap_nonpresent_ptes(). Also add a hmm-test case for this. + +Link: https://lore.kernel.org/20260501065116.2057242-1-apopple@nvidia.com +Fixes: 999dad824c39 ("mm/shmem: persist uffd-wp bit across zapping for file-backed") +Signed-off-by: Alistair Popple +Reported-by: Arsen Arsenović +Reviewed-by: Balbir Singh +Cc: David Hildenbrand +Cc: Jason Gunthorpe +Cc: John Hubbard +Cc: Leon Romanovsky +Cc: Liam R. Howlett +Cc: Lorenzo Stoakes +Cc: Peter Xu +Cc: Matthew Brost +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Shuah Khan +Cc: Suren Baghdasaryan +Cc: Thomas Hellström +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +[ adapted `folio_test_anon(folio)` to `PageAnon(page)` ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 2 - + tools/testing/selftests/mm/hmm-tests.c | 50 +++++++++++++++++++++++++++++++++ + 2 files changed, 51 insertions(+), 1 deletion(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1475,7 +1475,7 @@ static unsigned long zap_pte_range(struc + * consider uffd-wp bit when zap. For more information, + * see zap_install_uffd_wp_if_needed(). + */ +- WARN_ON_ONCE(!vma_is_anonymous(vma)); ++ WARN_ON_ONCE(!PageAnon(page)); + rss[mm_counter(page)]--; + if (is_device_private_entry(entry)) + page_remove_rmap(page, vma, false); +--- a/tools/testing/selftests/mm/hmm-tests.c ++++ b/tools/testing/selftests/mm/hmm-tests.c +@@ -999,6 +999,56 @@ TEST_F(hmm, migrate) + } + + /* ++ * Migrate private file memory to device private memory. ++ */ ++TEST_F(hmm, migrate_file_private) ++{ ++ struct hmm_buffer *buffer; ++ unsigned long npages; ++ unsigned long size; ++ unsigned long i; ++ int *ptr; ++ int ret; ++ int fd; ++ ++ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; ++ ASSERT_NE(npages, 0); ++ size = npages << self->page_shift; ++ ++ fd = hmm_create_file(size); ++ ASSERT_GE(fd, 0); ++ ++ buffer = malloc(sizeof(*buffer)); ++ ASSERT_NE(buffer, NULL); ++ ++ buffer->fd = fd; ++ buffer->size = size; ++ buffer->mirror = malloc(size); ++ ASSERT_NE(buffer->mirror, NULL); ++ ++ buffer->ptr = mmap(NULL, size, ++ PROT_READ | PROT_WRITE, ++ MAP_PRIVATE, ++ buffer->fd, 0); ++ ASSERT_NE(buffer->ptr, MAP_FAILED); ++ ++ /* Initialize buffer in system memory. */ ++ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ++ ptr[i] = i; ++ ++ /* Migrate memory to device. */ ++ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ++ ASSERT_EQ(ret, 0); ++ ASSERT_EQ(buffer->cpages, npages); ++ ++ /* Check what the device read. */ ++ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ++ ASSERT_EQ(ptr[i], i); ++ ++ hmm_buffer_free(buffer); ++} ++ ++/* + * Migrate anonymous memory to device private memory and fault some of it back + * to system memory, then try migrating the resulting mix of system and device + * private memory to the device. diff --git a/queue-6.6/mptcp-cleanup-fallback-dummy-mapping-generation.patch b/queue-6.6/mptcp-cleanup-fallback-dummy-mapping-generation.patch new file mode 100644 index 0000000000..9443bb9282 --- /dev/null +++ b/queue-6.6/mptcp-cleanup-fallback-dummy-mapping-generation.patch @@ -0,0 +1,74 @@ +From stable+bounces-256900-greg=kroah.com@vger.kernel.org Sat May 30 19:51:10 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:17:27 -0400 +Subject: mptcp: cleanup fallback dummy mapping generation +To: stable@vger.kernel.org +Cc: Paolo Abeni , Geliang Tang , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20260530141728.2398427-2-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 2834f8edd74d5dda368087a654c0e52b141e9893 ] + +MPTCP currently access ack_seq outside the msk socket log scope to +generate the dummy mapping for fallback socket. Soon we are going +to introduce backlog usage and even for fallback socket the ack_seq +value will be significantly off outside of the msk socket lock scope. + +Avoid relying on ack_seq for dummy mapping generation, using instead +the subflow sequence number. Note that in case of disconnect() and +(re)connect() we must ensure that any previous state is re-set. + +Signed-off-by: Paolo Abeni +Reviewed-by: Geliang Tang +Tested-by: Geliang Tang +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-6-1f34b6c1e0b1@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 0981f90e1a05 ("mptcp: reset rcv wnd on disconnect") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 3 +++ + net/mptcp/subflow.c | 8 +++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3331,6 +3331,9 @@ static int mptcp_disconnect(struct sock + msk->rcvspace_init = 0; + msk->fastclosing = 0; + ++ /* for fallback's sake */ ++ WRITE_ONCE(msk->ack_seq, 0); ++ + WRITE_ONCE(sk->sk_shutdown, 0); + sk_error_report(sk); + return 0; +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -471,6 +471,9 @@ static void subflow_set_remote_key(struc + mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn); + subflow->iasn++; + ++ /* for fallback's sake */ ++ subflow->map_seq = subflow->iasn; ++ + WRITE_ONCE(msk->remote_key, subflow->remote_key); + WRITE_ONCE(msk->ack_seq, subflow->iasn); + WRITE_ONCE(msk->can_ack, true); +@@ -1382,9 +1385,12 @@ reset: + + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; +- subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; ++ subflow->map_seq = __mptcp_expand_seq(subflow->map_seq, ++ subflow->iasn + ++ TCP_SKB_CB(skb)->seq - ++ subflow->ssn_offset - 1); + WRITE_ONCE(subflow->data_avail, true); + return true; + } diff --git a/queue-6.6/mptcp-do-not-drop-partial-packets.patch b/queue-6.6/mptcp-do-not-drop-partial-packets.patch new file mode 100644 index 0000000000..8485185dbc --- /dev/null +++ b/queue-6.6/mptcp-do-not-drop-partial-packets.patch @@ -0,0 +1,78 @@ +From stable+bounces-256920-greg=kroah.com@vger.kernel.org Sat May 30 20:37:54 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 11:07:40 -0400 +Subject: mptcp: do not drop partial packets +To: stable@vger.kernel.org +Cc: Shardul Bankar , Paolo Abeni , "Matthieu Baerts (NGI0)" , Sasha Levin +Message-ID: <20260530150740.2598142-3-sashal@kernel.org> + +From: Shardul Bankar + +[ Upstream commit 50c2d91c5dfa0e465826ec1f8dbad9cdc254bd85 ] + +When a packet arrives with map_seq < ack_seq < end_seq, the beginning +of the packet has already been acknowledged but the end contains new +data. Currently the entire packet is dropped as "old data," forcing +the sender to retransmit. + +Instead, skip the already-acked bytes by adjusting the skb offset and +enqueue only the new portion. Update bytes_received and ack_seq to +reflect the new data consumed. + +A previous attempt at this fix has been sent by Paolo Abeni [1], but had +issues [2]: it also added a zero-window check and changed rcv_wnd_sent +initialization, which caused test regressions. This version addresses +only the partial packet handling without modifying receive window +accounting. + +Fixes: ab174ad8ef76 ("mptcp: move ooo skbs into msk out of order queue.") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/c9b426a4e163aa3c4fe8b80c79f1a610f47ae7d8.1763075056.git.pabeni@redhat.com [1] +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/600 [2] +Signed-off-by: Shardul Bankar +[pabeni@redhat.com: update map] +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-1-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -392,12 +392,26 @@ static bool __mptcp_move_skb(struct sock + return false; + } + +- /* old data, keep it simple and drop the whole pkt, sender +- * will retransmit as needed, if needed. ++ /* Completely old data? */ ++ if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) { ++ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); ++ mptcp_drop(sk, skb); ++ return false; ++ } ++ ++ /* Partial packet: map_seq < ack_seq < end_seq. ++ * Skip the already-acked bytes and enqueue the new data. + */ +- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); +- mptcp_drop(sk, skb); +- return false; ++ copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq; ++ MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; ++ MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq - ++ MPTCP_SKB_CB(skb)->map_seq; ++ msk->bytes_received += copy_len; ++ WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); ++ ++ skb_set_owner_r(skb, sk); ++ __skb_queue_tail(&sk->sk_receive_queue, skb); ++ return true; + } + + static void mptcp_stop_rtx_timer(struct sock *sk) diff --git a/queue-6.6/mptcp-handle-first-subflow-closing-consistently.patch b/queue-6.6/mptcp-handle-first-subflow-closing-consistently.patch new file mode 100644 index 0000000000..c2b56f7dd5 --- /dev/null +++ b/queue-6.6/mptcp-handle-first-subflow-closing-consistently.patch @@ -0,0 +1,103 @@ +From stable+bounces-256919-greg=kroah.com@vger.kernel.org Sat May 30 20:37:49 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 11:07:39 -0400 +Subject: mptcp: handle first subflow closing consistently +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20260530150740.2598142-2-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 0eeb372deebce6c25b9afc09e35d6c75a744299a ] + +Currently, as soon as the PM closes a subflow, the msk stops accepting +data from it, even if the TCP socket could be still formally open in the +incoming direction, with the notable exception of the first subflow. + +The root cause of such behavior is that code currently piggy back two +separate semantic on the subflow->disposable bit: the subflow context +must be released and that the subflow must stop accepting incoming +data. + +The first subflow is never disposed, so it also never stop accepting +incoming data. Use a separate bit to mark the latter status and set such +bit in __mptcp_close_ssk() for all subflows. + +Beyond making per subflow behaviour more consistent this will also +simplify the next patch. + +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-11-1f34b6c1e0b1@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 50c2d91c5dfa ("mptcp: do not drop partial packets") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 14 +++++++++----- + net/mptcp/protocol.h | 3 ++- + 2 files changed, 11 insertions(+), 6 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -889,10 +889,10 @@ void mptcp_data_ready(struct sock *sk, s + int sk_rbuf, ssk_rbuf; + + /* The peer can send data while we are shutting down this +- * subflow at msk destruction time, but we must avoid enqueuing ++ * subflow at subflow destruction time, but we must avoid enqueuing + * more data to the msk receive queue + */ +- if (unlikely(subflow->disposable)) ++ if (unlikely(subflow->closing)) + return; + + ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); +@@ -2491,6 +2491,13 @@ static void __mptcp_close_ssk(struct soc + struct mptcp_sock *msk = mptcp_sk(sk); + bool dispose_it, need_push = false; + ++ /* Do not pass RX data to the msk, even if the subflow socket is not ++ * going to be freed (i.e. even for the first subflow on graceful ++ * subflow close. ++ */ ++ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); ++ subflow->closing = 1; ++ + /* If the first subflow moved to a close state before accept, e.g. due + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't +@@ -2501,7 +2508,6 @@ static void __mptcp_close_ssk(struct soc + /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); + sock_set_flag(sk, SOCK_DEAD); +- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_drop_ctx(ssk); + goto out_release; + } +@@ -2510,8 +2516,6 @@ static void __mptcp_close_ssk(struct soc + if (dispose_it) + list_del(&subflow->node); + +- lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); +- + if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE) + tcp_set_state(ssk, TCP_CLOSE); + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -503,11 +503,12 @@ struct mptcp_subflow_context { + send_infinite_map : 1, + remote_key_valid : 1, /* received the peer key from */ + disposable : 1, /* ctx can be free at ulp release time */ ++ closing : 1, /* must not pass rx data to msk anymore */ + stale : 1, /* unable to snd/rcv data, do not use for xmit */ + valid_csum_seen : 1, /* at least one csum validated */ + is_mptfo : 1, /* subflow is doing TFO */ + close_event_done : 1, /* has done the post-closed part */ +- __unused : 9; ++ __unused : 8; + bool data_avail; + bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ diff --git a/queue-6.6/mptcp-introduce-the-mptcp_init_skb-helper.patch b/queue-6.6/mptcp-introduce-the-mptcp_init_skb-helper.patch new file mode 100644 index 0000000000..50555a89a8 --- /dev/null +++ b/queue-6.6/mptcp-introduce-the-mptcp_init_skb-helper.patch @@ -0,0 +1,119 @@ +From stable+bounces-256918-greg=kroah.com@vger.kernel.org Sat May 30 20:37:47 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 11:07:38 -0400 +Subject: mptcp: introduce the mptcp_init_skb helper +To: stable@vger.kernel.org +Cc: Paolo Abeni , "Matthieu Baerts (NGI0)" , Geliang Tang , Jakub Kicinski , Sasha Levin +Message-ID: <20260530150740.2598142-1-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 9a0afe0db46720ce1a009c7dac168aa0584bd732 ] + +Factor out all the skb initialization step in a new helper and +use it. Note that this change moves the MPTCP CB initialization +earlier: we can do such step as soon as the skb leaves the +subflow socket receive queues. + +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Paolo Abeni +Reviewed-by: Geliang Tang +Tested-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250927-net-next-mptcp-rcv-path-imp-v1-4-5da266aa9c1a@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 50c2d91c5dfa ("mptcp: do not drop partial packets") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 50 +++++++++++++++++++++++++++----------------------- + 1 file changed, 27 insertions(+), 23 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -323,7 +323,7 @@ end: + mptcp_set_owner_r(skb, sk); + } + +-static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) ++static bool mptcp_rmem_schedule(struct sock *sk, int size) + { + struct mptcp_sock *msk = mptcp_sk(sk); + int amt, amount; +@@ -341,27 +341,11 @@ static bool mptcp_rmem_schedule(struct s + return true; + } + +-static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, +- struct sk_buff *skb, unsigned int offset, +- size_t copy_len) ++static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, ++ int copy_len) + { +- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); +- struct sock *sk = (struct sock *)msk; +- struct sk_buff *tail; +- bool has_rxtstamp; +- +- __skb_unlink(skb, &ssk->sk_receive_queue); +- +- skb_ext_reset(skb); +- skb_orphan(skb); +- +- /* try to fetch required memory from subflow */ +- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { +- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); +- goto drop; +- } +- +- has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; ++ const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); ++ bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + /* the skb map_seq accounts for the skb offset: + * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq +@@ -373,6 +357,25 @@ static bool __mptcp_move_skb(struct mptc + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; + MPTCP_SKB_CB(skb)->cant_coalesce = 0; + ++ __skb_unlink(skb, &ssk->sk_receive_queue); ++ ++ skb_ext_reset(skb); ++ skb_dst_drop(skb); ++} ++ ++static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb) ++{ ++ u64 copy_len = MPTCP_SKB_CB(skb)->end_seq - MPTCP_SKB_CB(skb)->map_seq; ++ struct mptcp_sock *msk = mptcp_sk(sk); ++ struct sk_buff *tail; ++ ++ /* try to fetch required memory from subflow */ ++ if (!mptcp_rmem_schedule(sk, skb->truesize)) { ++ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); ++ mptcp_drop(sk, skb); ++ return false; ++ } ++ + if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { + /* in sequence */ + msk->bytes_received += copy_len; +@@ -393,7 +396,6 @@ static bool __mptcp_move_skb(struct mptc + * will retransmit as needed, if needed. + */ + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); +-drop: + mptcp_drop(sk, skb); + return false; + } +@@ -720,7 +722,9 @@ static bool __mptcp_move_skbs_from_subfl + if (tp->urg_data) + done = true; + +- if (__mptcp_move_skb(msk, ssk, skb, offset, len)) ++ mptcp_init_skb(ssk, skb, offset, len); ++ skb_orphan(skb); ++ if (__mptcp_move_skb(sk, skb)) + moved += len; + seq += len; + diff --git a/queue-6.6/mptcp-pm-fix-add_addr-timer-infinite-retry-on-option-space-insufficient.patch b/queue-6.6/mptcp-pm-fix-add_addr-timer-infinite-retry-on-option-space-insufficient.patch new file mode 100644 index 0000000000..e0b80448ab --- /dev/null +++ b/queue-6.6/mptcp-pm-fix-add_addr-timer-infinite-retry-on-option-space-insufficient.patch @@ -0,0 +1,150 @@ +From stable+bounces-256845-greg=kroah.com@vger.kernel.org Sat May 30 06:29:38 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 20:59:27 -0400 +Subject: mptcp: pm: fix ADD_ADDR timer infinite retry on option space insufficient +To: stable@vger.kernel.org +Cc: Li Xiasong , "Matthieu Baerts (NGI0)" , Paolo Abeni , Sasha Levin +Message-ID: <20260530005928.2440591-1-sashal@kernel.org> + +From: Li Xiasong + +[ Upstream commit 51e398a3b8961b26a8c0a4ba9a777c5339791707 ] + +When TCP option space is insufficient (e.g., when sending ADD_ADDR with an +IPv6 address and port while tcp_timestamps is enabled), the original code +jumped to out_unlock without clearing the addr_signal flag. This caused +mptcp_pm_add_timer to keep rescheduling indefinitely, not sending ADD_ADDR, +preventing subsequent addresses in the endpoint list from being announced. + +Handle this case by clearing the ADD_ADDR signal and skipping the matching +ADD_ADDR retransmission entry. The skip path cancels the matching timer +(with id check) and advances PM state progression, preserving forward +progress to subsequent PM work. + +This cancellation is inherently best-effort. A concurrent add_timer +callback may already be running and may acquire pm.lock before the +cancel path updates entry state. In that case, one final ADD_ADDR +transmit attempt can still be executed. + +Once the cancel path sets entry->retrans_times to ADD_ADDR_RETRANS_MAX, +the callback-side retrans_times check suppresses further ADD_ADDR +retransmissions. + +Note that when an ADD_ADDR is being prepared, a pure-ACK is queued. On +the output side, it means that it is fine to skip non-pure-ACK packets, +when drop_other_suboptions is set: a pure-ACK will be processed soon +after. + +Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") +Cc: stable@vger.kernel.org +Signed-off-by: Li Xiasong +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-2-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 40 +++++++++++++++++++++++++++++++++------- + net/mptcp/pm_netlink.c | 16 +++++++++++++--- + 2 files changed, 46 insertions(+), 10 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -335,6 +335,7 @@ bool mptcp_pm_add_addr_signal(struct mpt + struct mptcp_addr_info *addr, bool *echo, + bool *drop_other_suboptions) + { ++ bool skip_add_addr = false; + int ret = false; + u8 add_addr; + u8 family; +@@ -356,24 +357,49 @@ bool mptcp_pm_add_addr_signal(struct mpt + } + + *echo = mptcp_pm_should_add_signal_echo(msk); +- port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); +- +- family = *echo ? msk->pm.remote.family : msk->pm.local.family; +- if (remaining < mptcp_add_addr_len(family, *echo, port)) +- goto out_unlock; +- + if (*echo) { + *addr = msk->pm.remote; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); ++ port = !!msk->pm.remote.port; ++ family = msk->pm.remote.family; + } else { + *addr = msk->pm.local; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); ++ port = !!msk->pm.local.port; ++ family = msk->pm.local.family; + } +- WRITE_ONCE(msk->pm.addr_signal, add_addr); ++ ++ if (remaining < mptcp_add_addr_len(family, *echo, port)) { ++ struct net *net = sock_net((struct sock *)msk); ++ ++ if (!*drop_other_suboptions) ++ goto out_unlock; ++ ++ if (*echo) { ++ MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP); ++ } else { ++ skip_add_addr = true; ++ MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP); ++ } ++ goto drop_signal_mark; ++ } ++ + ret = true; + ++drop_signal_mark: ++ WRITE_ONCE(msk->pm.addr_signal, add_addr); ++ + out_unlock: + spin_unlock_bh(&msk->pm.lock); ++ ++ /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR: ++ * clear the signal bit, cancel the matching retransmission timer, and ++ * let the PM state machine progress. ++ */ ++ if (skip_add_addr) { ++ mptcp_pm_del_add_timer(msk, addr, true); ++ mptcp_pm_subflow_established(msk); ++ } + return ret; + } + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -321,7 +321,13 @@ static void mptcp_pm_add_timer(struct ti + + spin_lock_bh(&msk->pm.lock); + +- if (!mptcp_pm_should_add_signal_addr(msk)) { ++ /* The cancel path (mptcp_pm_del_add_timer()) can race with this ++ * callback. Once cancel updates retrans_times to MAX, suppress further ++ * retransmissions here. If this callback acquires pm.lock first, one ++ * final transmit attempt is still possible. ++ */ ++ if (entry->retrans_times < ADD_ADDR_RETRANS_MAX && ++ !mptcp_pm_should_add_signal_addr(msk)) { + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); + mptcp_pm_announce_addr(msk, &entry->addr, false); + mptcp_pm_add_addr_send_ack(msk); +@@ -369,8 +375,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock + /* Note: entry might have been removed by another thread. + * We hold rcu_read_lock() to ensure it is not freed under us. + */ +- if (stop_timer) +- sk_stop_timer_sync(sk, &entry->add_timer); ++ if (stop_timer) { ++ if (check_id) ++ sk_stop_timer(sk, &entry->add_timer); ++ else ++ sk_stop_timer_sync(sk, &entry->add_timer); ++ } + + rcu_read_unlock(); + return entry; diff --git a/queue-6.6/mptcp-reset-rcv-wnd-on-disconnect.patch b/queue-6.6/mptcp-reset-rcv-wnd-on-disconnect.patch new file mode 100644 index 0000000000..74cf253ee0 --- /dev/null +++ b/queue-6.6/mptcp-reset-rcv-wnd-on-disconnect.patch @@ -0,0 +1,47 @@ +From stable+bounces-256901-greg=kroah.com@vger.kernel.org Sat May 30 19:50:14 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:17:28 -0400 +Subject: mptcp: reset rcv wnd on disconnect +To: stable@vger.kernel.org +Cc: Paolo Abeni , "Matthieu Baerts (NGI0)" , Sasha Levin +Message-ID: <20260530141728.2398427-3-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 0981f90e1a05773a4c29c6e720f5ea1e3c8f1876 ] + +If the MPTCP socket fallback to TCP before the MP handshake completion, +the IASN remain 0, and the rcv_wnd_sent field is not explicitly +initialized, just incremented over time with the data transfer. + +At disconnect time such value is not cleared. If the next connection falls +back to TCP before the MP handshake completion, the data transfer will +keep incrementing the receive window end sequence starting from the last +value used in the previous connection: the announced window will be +unrelated from the actual receiver buffer size and likely too big. + +Address the issue zeroing the field at disconnect time. + +Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-4-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -3333,6 +3333,7 @@ static int mptcp_disconnect(struct sock + + /* for fallback's sake */ + WRITE_ONCE(msk->ack_seq, 0); ++ atomic64_set(&msk->rcv_wnd_sent, 0); + + WRITE_ONCE(sk->sk_shutdown, 0); + sk_error_report(sk); diff --git a/queue-6.6/mptcp-use-plain-bool-instead-of-custom-binary-enum.patch b/queue-6.6/mptcp-use-plain-bool-instead-of-custom-binary-enum.patch new file mode 100644 index 0000000000..b73e329e38 --- /dev/null +++ b/queue-6.6/mptcp-use-plain-bool-instead-of-custom-binary-enum.patch @@ -0,0 +1,109 @@ +From stable+bounces-256899-greg=kroah.com@vger.kernel.org Sat May 30 19:51:07 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:17:26 -0400 +Subject: mptcp: use plain bool instead of custom binary enum +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , Jakub Kicinski , Sasha Levin +Message-ID: <20260530141728.2398427-1-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit f1f26512a9bf18f7a4c0d59df113a49f39d7d4b6 ] + +The 'data_avail' subflow field is already used as plain boolean, +drop the custom binary enum type and switch to bool. + +No functional changed intended. + +Reviewed-by: Mat Martineau +Signed-off-by: Paolo Abeni +Signed-off-by: Mat Martineau +Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-3-9dc60939d371@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 0981f90e1a05 ("mptcp: reset rcv wnd on disconnect") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.h | 7 +------ + net/mptcp/subflow.c | 12 ++++++------ + 2 files changed, 7 insertions(+), 12 deletions(-) + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -451,11 +451,6 @@ mptcp_subflow_rsk(const struct request_s + return (struct mptcp_subflow_request_sock *)rsk; + } + +-enum mptcp_data_avail { +- MPTCP_SUBFLOW_NODATA, +- MPTCP_SUBFLOW_DATA_AVAIL, +-}; +- + struct mptcp_delegated_action { + struct napi_struct napi; + struct list_head head; +@@ -513,7 +508,7 @@ struct mptcp_subflow_context { + is_mptfo : 1, /* subflow is doing TFO */ + close_event_done : 1, /* has done the post-closed part */ + __unused : 9; +- enum mptcp_data_avail data_avail; ++ bool data_avail; + bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ + u32 remote_nonce; +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1303,7 +1303,7 @@ static bool subflow_check_data_avail(str + struct sk_buff *skb; + + if (!skb_peek(&ssk->sk_receive_queue)) +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); ++ WRITE_ONCE(subflow->data_avail, false); + if (subflow->data_avail) + return true; + +@@ -1337,7 +1337,7 @@ static bool subflow_check_data_avail(str + continue; + } + +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); ++ WRITE_ONCE(subflow->data_avail, true); + break; + } + return true; +@@ -1358,7 +1358,7 @@ fallback: + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; + goto reset; + } +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); ++ WRITE_ONCE(subflow->data_avail, true); + return true; + } + +@@ -1375,7 +1375,7 @@ reset: + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + tcp_send_active_reset(ssk, GFP_ATOMIC); +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); ++ WRITE_ONCE(subflow->data_avail, false); + return false; + } + } +@@ -1385,7 +1385,7 @@ reset: + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); ++ WRITE_ONCE(subflow->data_avail, true); + return true; + } + +@@ -1397,7 +1397,7 @@ bool mptcp_subflow_data_available(struct + if (subflow->map_valid && + mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { + subflow->map_valid = 0; +- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); ++ WRITE_ONCE(subflow->data_avail, false); + + pr_debug("Done with mapping: seq=%u data_len=%u\n", + subflow->map_subflow_seq, diff --git a/queue-6.6/net-hsr-defer-node-table-free-until-after-rcu-readers.patch b/queue-6.6/net-hsr-defer-node-table-free-until-after-rcu-readers.patch new file mode 100644 index 0000000000..a10c3e1dce --- /dev/null +++ b/queue-6.6/net-hsr-defer-node-table-free-until-after-rcu-readers.patch @@ -0,0 +1,55 @@ +From stable+bounces-256830-greg=kroah.com@vger.kernel.org Sat May 30 05:19:53 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:49:45 -0400 +Subject: net: hsr: defer node table free until after RCU readers +To: stable@vger.kernel.org +Cc: Michael Bommarito , Jakub Kicinski , Sasha Levin +Message-ID: <20260529234945.1939832-1-sashal@kernel.org> + +From: Michael Bommarito + +[ Upstream commit aaec7096f9961eb223b5b149abe9495525c205d9 ] + +HSR node-list and node-status generic-netlink operations run under +rcu_read_lock(). They walk hsr->node_db through hsr_get_next_node() and +hsr_get_node_data(), but RTM_DELLINK teardown removes the same node table +with plain list_del() and frees each node immediately. + +That lets a generic-netlink reader hold a struct hsr_node pointer across +hsr_dellink(). In a KASAN build, widening the reader window after +hsr_get_next_node() obtains the node reproduces a slab-use-after-free +when the reader copies node->macaddress_A; the freeing stack is +hsr_del_nodes() from hsr_dellink(). + +Use list_del_rcu() and defer the free through the existing +hsr_free_node_rcu() callback. This matches the lifetime rule used by the +HSR prune paths, which already delete nodes with list_del_rcu() and +call_rcu(). + +Fixes: b9a1e627405d ("hsr: implement dellink to clean up resources") +Cc: stable@vger.kernel.org # v5.3+ +Signed-off-by: Michael Bommarito +Link: https://patch.msgid.link/20260513233838.3064715-2-michael.bommarito@gmail.com +Signed-off-by: Jakub Kicinski +[ replaced `list_del`+`call_rcu(hsr_free_node_rcu)` with `list_del_rcu`+`kfree_rcu(node, rcu_head)` ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/hsr/hsr_framereg.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/hsr/hsr_framereg.c ++++ b/net/hsr/hsr_framereg.c +@@ -112,8 +112,10 @@ void hsr_del_nodes(struct list_head *nod + struct hsr_node *node; + struct hsr_node *tmp; + +- list_for_each_entry_safe(node, tmp, node_db, mac_list) +- kfree(node); ++ list_for_each_entry_safe(node, tmp, node_db, mac_list) { ++ list_del_rcu(&node->mac_list); ++ kfree_rcu(node, rcu_head); ++ } + } + + void prp_handle_san_frame(bool san, enum hsr_port_type port, diff --git a/queue-6.6/octeontx2-af-cgx-add-bounds-check-to-cgx_speed_mbps-index.patch b/queue-6.6/octeontx2-af-cgx-add-bounds-check-to-cgx_speed_mbps-index.patch new file mode 100644 index 0000000000..438d163bef --- /dev/null +++ b/queue-6.6/octeontx2-af-cgx-add-bounds-check-to-cgx_speed_mbps-index.patch @@ -0,0 +1,57 @@ +From stable+bounces-256910-greg=kroah.com@vger.kernel.org Sat May 30 19:56:58 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:26:46 -0400 +Subject: octeontx2-af: CGX: add bounds check to cgx_speed_mbps index +To: stable@vger.kernel.org +Cc: Sam Daly , Sunil Goutham , Linu Cherian , Geetha sowjanya , hariprasad , Subbaraya Sundeep , Andrew Lunn , stable , Greg Kroah-Hartman , Jakub Kicinski , Sasha Levin +Message-ID: <20260530142646.2429080-2-sashal@kernel.org> + +From: Sam Daly + +[ Upstream commit c0bf0a4f3f1f5f57aa83e1400ba4f56f0abfd542 ] + +cgx_speed_mbps has 13 elements but RESP_LINKSTAT_SPEED can yield values +0-15. If it returns a value >= 13, this causes an out-of-bounds array +access. Add a bounds check and default to speed 0 if the index is out of +range. + +Fixes: 61071a871ea6 ("octeontx2-af: Forward CGX link notifications to PFs") +Cc: Sunil Goutham +Cc: Linu Cherian +Cc: Geetha sowjanya +Cc: hariprasad +Cc: Subbaraya Sundeep +Cc: Andrew Lunn +Cc: stable +Signed-off-by: Sam Daly +Signed-off-by: Greg Kroah-Hartman +Link: https://patch.msgid.link/2026051352-refined-demise-e88d@gregkh +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +@@ -1286,13 +1286,18 @@ static inline void link_status_user_form + struct cgx_link_user_info *linfo, + struct cgx *cgx, u8 lmac_id) + { ++ unsigned int speed; ++ + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); + linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); +- linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; + linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); + linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); + linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat); + ++ speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat); ++ linfo->speed = speed < ARRAY_SIZE(cgx_speed_mbps) ? ++ cgx_speed_mbps[speed] : 0; ++ + if (linfo->lmac_type_id >= LMAC_MODE_MAX) { + dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", + linfo->lmac_type_id, cgx->cgx_id, lmac_id); diff --git a/queue-6.6/octeontx2-af-replace-deprecated-strncpy-with-strscpy.patch b/queue-6.6/octeontx2-af-replace-deprecated-strncpy-with-strscpy.patch new file mode 100644 index 0000000000..4fa9cf2095 --- /dev/null +++ b/queue-6.6/octeontx2-af-replace-deprecated-strncpy-with-strscpy.patch @@ -0,0 +1,68 @@ +From stable+bounces-256909-greg=kroah.com@vger.kernel.org Sat May 30 19:56:55 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:26:45 -0400 +Subject: octeontx2-af: replace deprecated strncpy with strscpy +To: stable@vger.kernel.org +Cc: Justin Stitt , Kees Cook , Jakub Kicinski , Sasha Levin +Message-ID: <20260530142646.2429080-1-sashal@kernel.org> + +From: Justin Stitt + +[ Upstream commit 473f8f2d1bfe1103f20140fdc80cad406b4d68c0 ] + +`strncpy` is deprecated for use on NUL-terminated destination strings +[1] and as such we should prefer more robust and less ambiguous string +interfaces. + +We can see that linfo->lmac_type is expected to be NUL-terminated based +on the `... - 1`'s present in the current code. Presumably making room +for a NUL-byte at the end of the buffer. + +Considering the above, a suitable replacement is `strscpy` [2] due to +the fact that it guarantees NUL-termination on the destination buffer +without unnecessarily NUL-padding. + +Let's also prefer the more idiomatic strscpy usage of (dest, src, +sizeof(dest)) rather than (dest, src, SOME_LEN). + +Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] +Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2] +Link: https://github.com/KSPP/linux/issues/90 +Signed-off-by: Justin Stitt +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20231010-strncpy-drivers-net-ethernet-marvell-octeontx2-af-cgx-c-v1-1-a443e18f9de8@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: c0bf0a4f3f1f ("octeontx2-af: CGX: add bounds check to cgx_speed_mbps index") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +@@ -1286,8 +1286,6 @@ static inline void link_status_user_form + struct cgx_link_user_info *linfo, + struct cgx *cgx, u8 lmac_id) + { +- const char *lmac_string; +- + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); + linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); + linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; +@@ -1298,12 +1296,12 @@ static inline void link_status_user_form + if (linfo->lmac_type_id >= LMAC_MODE_MAX) { + dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", + linfo->lmac_type_id, cgx->cgx_id, lmac_id); +- strncpy(linfo->lmac_type, "Unknown", LMACTYPE_STR_LEN - 1); ++ strscpy(linfo->lmac_type, "Unknown", sizeof(linfo->lmac_type)); + return; + } + +- lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; +- strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); ++ strscpy(linfo->lmac_type, cgx_lmactype_string[linfo->lmac_type_id], ++ sizeof(linfo->lmac_type)); + } + + /* Hardware event handlers */ diff --git a/queue-6.6/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch b/queue-6.6/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch new file mode 100644 index 0000000000..6bb6031656 --- /dev/null +++ b/queue-6.6/octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch @@ -0,0 +1,62 @@ +From stable+bounces-259287-greg=kroah.com@vger.kernel.org Sun May 31 01:18:03 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 15:47:56 -0400 +Subject: octeontx2-pf: avoid double free of pool->stack on AQ init failure +To: stable@vger.kernel.org +Cc: Dawei Feng , Zilin Guan , Simon Horman , Jakub Kicinski , Sasha Levin +Message-ID: <20260530194756.3258783-1-sashal@kernel.org> + +From: Dawei Feng + +[ Upstream commit 9b244c242bec48b37e82b89787afd6a4c43457e1 ] + +otx2_pool_aq_init() frees pool->stack when mailbox sync or retry +allocation fails, but leaves the pointer unchanged. Later, +otx2_sq_aura_pool_init() unwinds the partial setup through +otx2_aura_pool_free(), which frees pool->stack again. The CN20K-specific +cn20k_pool_aq_init() implementation has the same bug in +its corresponding error path. + +Set pool->stack to NULL immediately after the local free so the shared +cleanup path does not free the same stack again while cleaning up +partially initialized pool state. + +The bug was first flagged by an experimental analysis tool we are +developing for kernel memory-management bugs while analyzing +v6.13-rc1. The tool is still under development and is not yet publicly +available. Manual inspection confirms that the bug is still present in +v7.1-rc3. + +Runtime validation was not performed because reproducing this path +requires OcteonTX2/CN20K hardware. + +Fixes: caa2da34fd25 ("octeontx2-pf: Initialize and config queues") +Fixes: d322fbd17203 ("octeontx2-pf: Initialize cn20k specific aura and pool contexts") +Cc: stable@vger.kernel.org +Signed-off-by: Zilin Guan +Signed-off-by: Dawei Feng +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20260515151826.1005397-1-dawei.feng@seu.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -1386,11 +1386,13 @@ int otx2_pool_init(struct otx2_nic *pfvf + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + qmem_free(pfvf->dev, pool->stack); ++ pool->stack = NULL; + return err; + } + aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!aq) { + qmem_free(pfvf->dev, pool->stack); ++ pool->stack = NULL; + return -ENOMEM; + } + } diff --git a/queue-6.6/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch b/queue-6.6/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch new file mode 100644 index 0000000000..51473ebd38 --- /dev/null +++ b/queue-6.6/platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch @@ -0,0 +1,104 @@ +From stable+bounces-256916-greg=kroah.com@vger.kernel.org Sat May 30 20:11:28 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 10:37:55 -0400 +Subject: platform/x86/intel/vsec: Fix enable_cnt imbalance on PCIe error recovery +To: stable@vger.kernel.org +Cc: "Lukas Wunner" , "Ilpo Järvinen" , "Sasha Levin" +Message-ID: <20260530143755.2478131-1-sashal@kernel.org> + +From: Lukas Wunner + +[ Upstream commit 348ccc754d8939e21ca5956ff45720b81d6e407f ] + +After a PCIe Uncorrectable Error has been reported by a device with +Intel Vendor Specific Extended Capabilities and has been recovered +through a Secondary Bus Reset, its driver calls intel_vsec_pci_probe() +to rescan and reinitialize VSECs. + +intel_vsec_pci_probe() invokes pcim_enable_device() and thereby adds +another devm action which calls pcim_disable_device() on driver unbind. + +So once the driver unbinds, pcim_disable_device() will be called as many +times as an Uncorrectable Error occurred, plus one. This will lead to +an enable_cnt imbalance on driver unbind. + +Additionally, since commit dc957ab6aa05 ("platform/x86/intel/vsec: Add +private data for per-device data"), a devm_kzalloc() allocation is +leaked on every Uncorrectable Error. + +Avoid by splitting the VSEC rescan out of intel_vsec_pci_probe() into a +separate helper and calling that on PCIe error recovery. + +Fixes: 936874b77dd0 ("platform/x86/intel/vsec: Add PCI error recovery support to Intel PMT") +Signed-off-by: Lukas Wunner +Cc: stable@vger.kernel.org # v6.0+ +Link: https://patch.msgid.link/bd594d09fa866dc51dddc9a447c3b23f9b1402cc.1778736835.git.lukas@wunner.de +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/vsec.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +--- a/drivers/platform/x86/intel/vsec.c ++++ b/drivers/platform/x86/intel/vsec.c +@@ -358,20 +358,10 @@ static bool intel_vsec_walk_vsec(struct + return have_devices; + } + +-static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ++static int intel_vsec_pci_init(struct pci_dev *pdev, ++ struct intel_vsec_platform_info *info) + { +- struct intel_vsec_platform_info *info; + bool have_devices = false; +- int ret; +- +- ret = pcim_enable_device(pdev); +- if (ret) +- return ret; +- +- pci_save_state(pdev); +- info = (struct intel_vsec_platform_info *)id->driver_data; +- if (!info) +- return -EINVAL; + + if (intel_vsec_walk_dvsec(pdev, info)) + have_devices = true; +@@ -389,6 +379,23 @@ static int intel_vsec_pci_probe(struct p + return 0; + } + ++static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ++{ ++ struct intel_vsec_platform_info *info; ++ int ret; ++ ++ ret = pcim_enable_device(pdev); ++ if (ret) ++ return ret; ++ ++ pci_save_state(pdev); ++ info = (struct intel_vsec_platform_info *)id->driver_data; ++ if (!info) ++ return -EINVAL; ++ ++ return intel_vsec_pci_init(pdev, info); ++} ++ + /* DG1 info */ + static struct intel_vsec_header dg1_header = { + .length = 0x10, +@@ -492,10 +499,9 @@ static pci_ers_result_t intel_vsec_pci_s + devm_release_action(&pdev->dev, intel_vsec_remove_aux, + &intel_vsec_dev->auxdev); + } +- pci_disable_device(pdev); + pci_restore_state(pdev); + pci_dev_id = pci_match_id(intel_vsec_pci_ids, pdev); +- intel_vsec_pci_probe(pdev, pci_dev_id); ++ intel_vsec_pci_init(pdev, (struct intel_vsec_platform_info *)pci_dev_id->driver_data); + + out: + return status; diff --git a/queue-6.6/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch b/queue-6.6/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch new file mode 100644 index 0000000000..06e7c1101f --- /dev/null +++ b/queue-6.6/rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch @@ -0,0 +1,443 @@ +From stable+bounces-256800-greg=kroah.com@vger.kernel.org Sat May 30 04:51:03 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:20:55 -0400 +Subject: rxrpc: Fix DATA decrypt vs splice() by copying data to buffer in recvmsg +To: stable@vger.kernel.org +Cc: David Howells , Hyunwoo Kim , Simon Horman , Jiayuan Chen , linux-afs@lists.infradead.org, Jeffrey Altman , Marc Dionne , Jakub Kicinski , Sasha Levin +Message-ID: <20260529232056.1870836-1-sashal@kernel.org> + +From: David Howells + +[ Upstream commit d2bc90cf6c75cb96d2ce549be6c35efa3099d25b ] + +This improves the fix for CVE-2026-43500. + +Fix the pagecache corruption from in-place decryption of a DATA packet +transmitted locally by splice() by getting rid of the packet sharing in the +I/O thread and unconditionally extracting the packet content into a bounce +buffer in which the buffer is decrypted. recvmsg() (or the kernel +equivalent) then copies the data from the bounce buffer to the destination +buffer. The sk_buff then remains unmodified. + +This has an additional advantage in that the packet is then arranged in the +buffer with the correct alignment required for the crypto algorithms to +process directly. The performance of the crypto does seem to be a little +faster and, surprisingly, the unencrypted performance doesn't seem to +change much - possibly due to removing complexity from the I/O thread. + +Yet another advantage is that the I/O thread doesn't have to copy packets +which would slow down packet distribution, ACK generation, etc.. + +The buffer belongs to the call and is allocated initially at 2K, +sufficiently large to hold a whole jumbo subpacket, but the buffer will be +increased in size if needed. However, to take this work, MSG_PEEK may +cause a later packet to be decrypted into the buffer, in which case the +earlier one will need re-decrypting for a subsequent recvmsg(). + +Note that rx_pkt_offset may legitimately see 0 as a valid offset now, so +switch to using USHRT_MAX to indicate an invalid offset. + +Note also that I would generally prefer to replace the buffers of the +current sk_buff with a new kmalloc'd buffer of the right size, ditching the +old data and frags as this makes the handling of MSG_PEEK easier and +removes the re-decryption issue, but this looks like quite a complicated +thing to achieve. skb_morph() looks half way to what I want, but I don't +want to have to allocate a new sk_buff. + +Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") +Reported-by: Hyunwoo Kim +Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ +Signed-off-by: David Howells +cc: Simon Horman +cc: Jiayuan Chen +cc: linux-afs@lists.infradead.org +Reviewed-by: Jeffrey Altman +Tested-by: Marc Dionne +Link: https://patch.msgid.link/20260515230516.2718212-3-dhowells@redhat.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8bfab4b6ffc2 ("rxrpc: Fix RESPONSE packet verification to extract skb to a linear buffer") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 7 ++- + net/rxrpc/call_event.c | 27 +-------------- + net/rxrpc/call_object.c | 2 + + net/rxrpc/insecure.c | 3 - + net/rxrpc/recvmsg.c | 68 ++++++++++++++++++++++++++++++------- + net/rxrpc/rxkad.c | 86 +++++++++++++++++------------------------------- + 6 files changed, 96 insertions(+), 97 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -202,8 +202,6 @@ struct rxrpc_skb_priv { + struct { + u16 offset; /* Offset of data */ + u16 len; /* Length of data */ +- u8 flags; +-#define RXRPC_RX_VERIFIED 0x01 + }; + struct { + rxrpc_seq_t first_ack; /* First packet in acks table */ +@@ -677,6 +675,11 @@ struct rxrpc_call { + /* Received data tracking */ + struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ + struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ ++ void *rx_dec_buffer; /* Decryption buffer */ ++ unsigned short rx_dec_bsize; /* rx_dec_buffer size */ ++ unsigned short rx_dec_offset; /* Decrypted packet data offset */ ++ unsigned short rx_dec_len; /* Decrypted packet data len */ ++ rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */ + + rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ + rxrpc_seq_t rx_consumed; /* Highest packet consumed */ +--- a/net/rxrpc/call_event.c ++++ b/net/rxrpc/call_event.c +@@ -456,31 +456,8 @@ bool rxrpc_input_call_event(struct rxrpc + resend = true; + } + +- if (skb) { +- struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- +- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && +- sp->hdr.securityIndex != 0 && +- (skb_cloned(skb) || +- skb_has_frag_list(skb) || +- skb_has_shared_frag(skb))) { +- /* Unshare the packet so that it can be modified by +- * in-place decryption. +- */ +- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); +- +- if (nskb) { +- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); +- rxrpc_input_call_packet(call, nskb); +- rxrpc_free_skb(nskb, rxrpc_skb_put_input); +- } else { +- /* OOM - Drop the packet. */ +- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); +- } +- } else { +- rxrpc_input_call_packet(call, skb); +- } +- } ++ if (skb) ++ rxrpc_input_call_packet(call, skb); + + rxrpc_transmit_some_data(call); + +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -163,6 +163,7 @@ struct rxrpc_call *rxrpc_alloc_call(stru + spin_lock_init(&call->tx_lock); + refcount_set(&call->ref, 1); + call->debug_id = debug_id; ++ call->rx_pkt_offset = USHRT_MAX; + call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; +@@ -540,6 +541,7 @@ static void rxrpc_cleanup_ring(struct rx + { + rxrpc_purge_queue(&call->recvmsg_queue); + rxrpc_purge_queue(&call->rx_oos_queue); ++ kfree(call->rx_dec_buffer); + } + + /* +--- a/net/rxrpc/insecure.c ++++ b/net/rxrpc/insecure.c +@@ -32,9 +32,6 @@ static int none_secure_packet(struct rxr + + static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) + { +- struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +- +- sp->flags |= RXRPC_RX_VERIFIED; + return 0; + } + +--- a/net/rxrpc/recvmsg.c ++++ b/net/rxrpc/recvmsg.c +@@ -143,15 +143,52 @@ static void rxrpc_rotate_rx_window(struc + } + + /* +- * Decrypt and verify a DATA packet. ++ * Decrypt and verify a DATA packet. The content of the packet is pulled out ++ * into a flat buffer rather than decrypting in place in the skbuff. This also ++ * has the advantage of aligning the buffer correctly for the crypto routines. ++ * ++ * We keep track of the sequence number of the packet currently decrypted into ++ * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new ++ * packet, subsequent recvmsg() calls will have to go back and re-decrypt the ++ * current packet. + */ + static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) + { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ++ int ret; + +- if (sp->flags & RXRPC_RX_VERIFIED) +- return 0; +- return call->security->verify_packet(call, skb); ++ if (sp->len > call->rx_dec_bsize) { ++ /* Make sure we can hold a 1412-byte jumbo subpacket and make ++ * sure that the buffer size is aligned to a crypto blocksize. ++ */ ++ size_t size = clamp(round_up(sp->len, 32), 2048, 65535); ++ void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS); ++ ++ if (!buffer) ++ return -ENOMEM; ++ call->rx_dec_buffer = buffer; ++ call->rx_dec_bsize = size; ++ } ++ ++ ret = -EFAULT; ++ if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0) ++ goto err; ++ ++ call->rx_dec_offset = 0; ++ call->rx_dec_len = sp->len; ++ call->rx_dec_seq = sp->hdr.seq; ++ ret = call->security->verify_packet(call, skb); ++ if (ret < 0) ++ goto err; ++ return 0; ++ ++err: ++ kfree(call->rx_dec_buffer); ++ call->rx_dec_buffer = NULL; ++ call->rx_dec_bsize = 0; ++ call->rx_dec_offset = 0; ++ call->rx_dec_len = 0; ++ return ret; + } + + /* +@@ -202,17 +239,22 @@ static int rxrpc_recvmsg_data(struct soc + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + +- if (rx_pkt_offset == 0) { ++ if (call->rx_dec_seq != sp->hdr.seq || ++ !call->rx_dec_buffer) { + ret2 = rxrpc_verify_data(call, skb); + trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, +- sp->offset, sp->len, ret2); ++ call->rx_dec_offset, ++ call->rx_dec_len, ret2); + if (ret2 < 0) { + kdebug("verify = %d", ret2); + ret = ret2; + goto out; + } +- rx_pkt_offset = sp->offset; +- rx_pkt_len = sp->len; ++ } ++ ++ if (rx_pkt_offset == USHRT_MAX) { ++ rx_pkt_offset = call->rx_dec_offset; ++ rx_pkt_len = call->rx_dec_len; + } else { + trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); +@@ -224,10 +266,10 @@ static int rxrpc_recvmsg_data(struct soc + if (copy > remain) + copy = remain; + if (copy > 0) { +- ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, +- copy); +- if (ret2 < 0) { +- ret = ret2; ++ ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset, ++ copy, iter); ++ if (ret2 != copy) { ++ ret = -EFAULT; + goto out; + } + +@@ -248,7 +290,7 @@ static int rxrpc_recvmsg_data(struct soc + /* The whole packet has been transferred. */ + if (sp->hdr.flags & RXRPC_LAST_PACKET) + ret = 1; +- rx_pkt_offset = 0; ++ rx_pkt_offset = USHRT_MAX; + rx_pkt_len = 0; + + skb = skb_peek_next(skb, &call->recvmsg_queue); +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -412,27 +412,25 @@ static int rxkad_verify_packet_1(struct + rxrpc_seq_t seq, + struct skcipher_request *req) + { +- struct rxkad_level1_hdr sechdr; ++ struct rxkad_level1_hdr *sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt iv; +- struct scatterlist sg[16]; +- u32 data_size, buf; ++ struct scatterlist sg[1]; ++ void *data = call->rx_dec_buffer; ++ u32 len = sp->len, data_size, buf; + u16 check; + int ret; + + _enter(""); + +- if (sp->len < 8) ++ if (len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_header); + + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ +- sg_init_table(sg, ARRAY_SIZE(sg)); +- ret = skb_to_sgvec(skb, sg, sp->offset, 8); +- if (unlikely(ret < 0)) +- return ret; ++ sg_init_one(sg, data, len); + + /* start the decryption afresh */ + memset(&iv, 0, sizeof(iv)); +@@ -446,13 +444,11 @@ static int rxkad_verify_packet_1(struct + return ret; + + /* Extract the decrypted packet length */ +- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) +- return rxrpc_abort_eproto(call, skb, RXKADDATALEN, +- rxkad_abort_1_short_encdata); +- sp->offset += sizeof(sechdr); +- sp->len -= sizeof(sechdr); ++ sechdr = data; ++ call->rx_dec_offset = sizeof(*sechdr); ++ len -= sizeof(*sechdr); + +- buf = ntohl(sechdr.data_size); ++ buf = ntohl(sechdr->data_size); + data_size = buf & 0xffff; + + check = buf >> 16; +@@ -461,10 +457,10 @@ static int rxkad_verify_packet_1(struct + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_check); +- if (data_size > sp->len) ++ if (data_size > len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_data); +- sp->len = data_size; ++ call->rx_dec_len = data_size; + + _leave(" = 0 [dlen=%x]", data_size); + return 0; +@@ -478,43 +474,28 @@ static int rxkad_verify_packet_2(struct + struct skcipher_request *req) + { + const struct rxrpc_key_token *token; +- struct rxkad_level2_hdr sechdr; ++ struct rxkad_level2_hdr *sechdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt iv; +- struct scatterlist _sg[4], *sg; +- u32 data_size, buf; ++ struct scatterlist sg[1]; ++ void *data = call->rx_dec_buffer; ++ u32 len = sp->len, data_size, buf; + u16 check; +- int nsg, ret; ++ int ret; + +- _enter(",{%d}", sp->len); ++ _enter(",{%d}", len); + +- if (sp->len < 8) ++ if (len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_header); + + /* Don't let the crypto algo see a misaligned length. */ +- sp->len = round_down(sp->len, 8); ++ len = round_down(len, 8); + +- /* Decrypt the skbuff in-place. TODO: We really want to decrypt +- * directly into the target buffer. ++ /* Decrypt in place in the call's decryption buffer. TODO: We really ++ * want to decrypt directly into the target buffer. + */ +- sg = _sg; +- nsg = skb_shinfo(skb)->nr_frags + 1; +- if (nsg <= 4) { +- nsg = 4; +- } else { +- sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); +- if (!sg) +- return -ENOMEM; +- } +- +- sg_init_table(sg, nsg); +- ret = skb_to_sgvec(skb, sg, sp->offset, sp->len); +- if (unlikely(ret < 0)) { +- if (sg != _sg) +- kfree(sg); +- return ret; +- } ++ sg_init_one(sg, data, len); + + /* decrypt from the session key */ + token = call->conn->key->payload.data[0]; +@@ -522,11 +503,9 @@ static int rxkad_verify_packet_2(struct + + skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); +- skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); ++ skcipher_request_set_crypt(req, sg, sg, len, iv.x); + ret = crypto_skcipher_decrypt(req); + skcipher_request_zero(req); +- if (sg != _sg) +- kfree(sg); + if (ret < 0) { + if (ret == -ENOMEM) + return ret; +@@ -535,13 +514,11 @@ static int rxkad_verify_packet_2(struct + } + + /* Extract the decrypted packet length */ +- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) +- return rxrpc_abort_eproto(call, skb, RXKADDATALEN, +- rxkad_abort_2_short_len); +- sp->offset += sizeof(sechdr); +- sp->len -= sizeof(sechdr); ++ sechdr = data; ++ call->rx_dec_offset = sizeof(*sechdr); ++ len -= sizeof(*sechdr); + +- buf = ntohl(sechdr.data_size); ++ buf = ntohl(sechdr->data_size); + data_size = buf & 0xffff; + + check = buf >> 16; +@@ -551,17 +528,18 @@ static int rxkad_verify_packet_2(struct + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_check); + +- if (data_size > sp->len) ++ if (data_size > len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_data); + +- sp->len = data_size; ++ call->rx_dec_len = data_size; + _leave(" = 0 [dlen=%x]", data_size); + return 0; + } + + /* +- * Verify the security on a received packet and the subpackets therein. ++ * Verify the security on a received (sub)packet. If the packet needs ++ * modifying (e.g. decrypting), it must be copied. + */ + static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) + { diff --git a/queue-6.6/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch b/queue-6.6/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch new file mode 100644 index 0000000000..84224e1aa5 --- /dev/null +++ b/queue-6.6/rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch @@ -0,0 +1,192 @@ +From stable+bounces-256801-greg=kroah.com@vger.kernel.org Sat May 30 04:51:30 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 19:20:56 -0400 +Subject: rxrpc: Fix RESPONSE packet verification to extract skb to a linear buffer +To: stable@vger.kernel.org +Cc: David Howells , Hyunwoo Kim , Simon Horman , Jiayuan Chen , linux-afs@lists.infradead.org, stable@kernel.org, Jeffrey Altman , Marc Dionne , Jakub Kicinski , Sasha Levin +Message-ID: <20260529232056.1870836-2-sashal@kernel.org> + +From: David Howells + +[ Upstream commit 8bfab4b6ffc2fe92da86300728fc8c3c7ebffb56 ] + +This improves the fix for CVE-2026-43500. + +Fix the verification of RESPONSE packets to avoid the problem of +overwriting a RESPONSE packet sent via splice to a local address by +extracting the contents of the UDP packet into a kmalloc'd linear buffer +rather than decrypting the data in place in the sk_buff (which may corrupt +the original buffer). + +Fixes: 24481a7f5733 ("rxrpc: Fix conn-level packet handling to unshare RESPONSE packets") +Reported-by: Hyunwoo Kim +Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ +Signed-off-by: David Howells +cc: Simon Horman +cc: Jiayuan Chen +cc: linux-afs@lists.infradead.org +cc: stable@kernel.org +Reviewed-by: Jeffrey Altman +Tested-by: Marc Dionne +Link: https://patch.msgid.link/20260515230516.2718212-4-dhowells@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/ar-internal.h | 5 +++-- + net/rxrpc/conn_event.c | 30 ++++++++++++------------------ + net/rxrpc/insecure.c | 5 +++-- + net/rxrpc/rxkad.c | 29 ++++++++++------------------- + 4 files changed, 28 insertions(+), 41 deletions(-) + +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -267,8 +267,9 @@ struct rxrpc_security { + struct sk_buff *); + + /* verify a response */ +- int (*verify_response)(struct rxrpc_connection *, +- struct sk_buff *); ++ int (*verify_response)(struct rxrpc_connection *conn, ++ struct sk_buff *response_skb, ++ void *response, unsigned int len); + + /* clear connection security */ + void (*clear)(struct rxrpc_connection *); +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -229,28 +229,22 @@ static void rxrpc_call_is_secure(struct + static int rxrpc_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb) + { ++ unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); ++ void *buffer; + int ret; + +- if (skb_cloned(skb) || skb_has_frag_list(skb) || +- skb_has_shared_frag(skb)) { +- /* Copy the packet if shared so that we can do in-place +- * decryption. +- */ +- struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); ++ buffer = kmalloc(len, GFP_NOFS); ++ if (!buffer) ++ return -ENOMEM; + +- if (nskb) { +- rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); +- ret = conn->security->verify_response(conn, nskb); +- rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); +- } else { +- /* OOM - Drop the packet. */ +- rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); +- ret = -ENOMEM; +- } +- } else { +- ret = conn->security->verify_response(conn, skb); +- } ++ ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), buffer, len); ++ if (ret < 0) ++ goto out; + ++ ret = conn->security->verify_response(conn, skb, buffer, len); ++ ++out: ++ kfree(buffer); + return ret; + } + +--- a/net/rxrpc/insecure.c ++++ b/net/rxrpc/insecure.c +@@ -47,9 +47,10 @@ static int none_respond_to_challenge(str + } + + static int none_verify_response(struct rxrpc_connection *conn, +- struct sk_buff *skb) ++ struct sk_buff *response_skb, ++ void *response, unsigned int len) + { +- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, ++ return rxrpc_abort_conn(conn, response_skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_response); + } + +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -874,7 +874,6 @@ static int rxkad_decrypt_ticket(struct r + *_expiry = 0; + + ASSERT(server_key->payload.data[0] != NULL); +- ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); + + memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); + +@@ -1023,14 +1022,15 @@ unlock: + * verify a response + */ + static int rxkad_verify_response(struct rxrpc_connection *conn, +- struct sk_buff *skb) ++ struct sk_buff *skb, ++ void *buffer, unsigned int len) + { + struct rxkad_response *response; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt session_key; + struct key *server_key; + time64_t expiry; +- void *ticket = NULL; ++ void *ticket; + u32 version, kvno, ticket_len, level; + __be32 csum; + int ret, i; +@@ -1053,13 +1053,8 @@ static int rxkad_verify_response(struct + } + } + +- ret = -ENOMEM; +- response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); +- if (!response) +- goto error; +- +- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), +- response, sizeof(*response)) < 0) { ++ response = buffer; ++ if (len < sizeof(*response)) { + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); + goto error; +@@ -1071,6 +1066,9 @@ static int rxkad_verify_response(struct + + trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); + ++ buffer += sizeof(*response); ++ len -= sizeof(*response); ++ + if (version != RXKAD_VERSION) { + ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); +@@ -1090,13 +1088,8 @@ static int rxkad_verify_response(struct + } + + /* extract the kerberos ticket and decrypt and decode it */ +- ret = -ENOMEM; +- ticket = kmalloc(ticket_len, GFP_NOFS); +- if (!ticket) +- goto error; +- +- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), +- ticket, ticket_len) < 0) { ++ ticket = buffer; ++ if (ticket_len > len) { + ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto error; +@@ -1176,8 +1169,6 @@ static int rxkad_verify_response(struct + ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); + + error: +- kfree(ticket); +- kfree(response); + key_put(server_key); + _leave(" = %d", ret); + return ret; diff --git a/queue-6.6/selftests-mptcp-drop-nanoseconds-width-specifier.patch b/queue-6.6/selftests-mptcp-drop-nanoseconds-width-specifier.patch new file mode 100644 index 0000000000..3c1e487665 --- /dev/null +++ b/queue-6.6/selftests-mptcp-drop-nanoseconds-width-specifier.patch @@ -0,0 +1,69 @@ +From stable+bounces-256886-greg=kroah.com@vger.kernel.org Sat May 30 17:20:16 2026 +From: Sasha Levin +Date: Sat, 30 May 2026 07:49:22 -0400 +Subject: selftests: mptcp: drop nanoseconds width specifier +To: stable@vger.kernel.org +Cc: "Matthieu Baerts (NGI0)" , Paolo Abeni , Sasha Levin +Message-ID: <20260530114922.1970609-1-sashal@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +[ Upstream commit 01ff78e4b3d98689184c52d97f9575dfbdc3b10f ] + +Using the format specifier +%s%3N with GNU date is honoured, and only +prints 3 digits of the nanoseconds portion of the seconds since epoch, +which corresponds to the milliseconds. + +The uutils implementation of date currently does not honour this, and +always prints all 9 digits. This is a known issue [1], but can be worked +around by adapting this test to use nanoseconds instead of microseconds, +and then divide it by 1e6. + +This fix is similar to what has been done on systemd side [2], and it is +needed to run the selftests on Ubuntu 26.04, containing uutils 0.8.0. + +Note that the Fixes tag is there even if this patch doesn't fix an issue +in the kernel selftests, but it is useful for those using uutils 0.8.0. + +Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") +Cc: stable@vger.kernel.org +Link: https://github.com/uutils/coreutils/issues/11658 [1] +Link: https://github.com/systemd/systemd/pull/41627 [2] +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-6-701e96419f2f@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_connect.sh | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh +@@ -434,7 +434,7 @@ do_transfer() + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" + + local start +- start=$(date +%s%3N) ++ start=$(date +%s%N) + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ +@@ -447,7 +447,7 @@ do_transfer() + local rets=$? + + local stop +- stop=$(date +%s%3N) ++ stop=$(date +%s%N) + + if $capture; then + sleep 1 +@@ -463,7 +463,7 @@ do_transfer() + fi + + local duration +- duration=$((stop-start)) ++ duration=$(((stop-start) / 1000000)) + result_msg+=" # time=${duration}ms" + printf "(duration %05sms) " "${duration}" + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then diff --git a/queue-6.6/serdev-make-serdev_bus_type-const.patch b/queue-6.6/serdev-make-serdev_bus_type-const.patch new file mode 100644 index 0000000000..3dad028e88 --- /dev/null +++ b/queue-6.6/serdev-make-serdev_bus_type-const.patch @@ -0,0 +1,38 @@ +From stable+bounces-256743-greg=kroah.com@vger.kernel.org Sat May 30 01:03:13 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:33:00 -0400 +Subject: serdev: make serdev_bus_type const +To: stable@vger.kernel.org +Cc: "Ricardo B. Marliere" , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20260529193303.1704693-1-sashal@kernel.org> + +From: "Ricardo B. Marliere" + +[ Upstream commit 88cddfb7bf23b06876da6c3e9f296e666d0f6332 ] + +Now that the driver core can properly handle constant struct bus_type, +move the serdev_bus_type variable to be a constant structure as well, +placing it into read-only memory which can not be modified at runtime. + +Suggested-by: Greg Kroah-Hartman +Signed-off-by: "Ricardo B. Marliere" +Link: https://lore.kernel.org/r/20240203-bus_cleanup-tty-v1-1-86b698c82efe@marliere.net +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 375ba7484132 ("Bluetooth: hci_qca: Convert timeout from jiffies to ms") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serdev/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/tty/serdev/core.c ++++ b/drivers/tty/serdev/core.c +@@ -441,7 +441,7 @@ static void serdev_drv_remove(struct dev + dev_pm_domain_detach(dev, true); + } + +-static struct bus_type serdev_bus_type = { ++static const struct bus_type serdev_bus_type = { + .name = "serial", + .match = serdev_device_match, + .probe = serdev_drv_probe, diff --git a/queue-6.6/serdev-provide-a-bustype-shutdown-function.patch b/queue-6.6/serdev-provide-a-bustype-shutdown-function.patch new file mode 100644 index 0000000000..8301d578d0 --- /dev/null +++ b/queue-6.6/serdev-provide-a-bustype-shutdown-function.patch @@ -0,0 +1,88 @@ +From stable+bounces-256744-greg=kroah.com@vger.kernel.org Sat May 30 01:04:47 2026 +From: Sasha Levin +Date: Fri, 29 May 2026 15:33:01 -0400 +Subject: serdev: Provide a bustype shutdown function +To: stable@vger.kernel.org +Cc: "Uwe Kleine-König" , "Greg Kroah-Hartman" , "Sasha Levin" +Message-ID: <20260529193303.1704693-2-sashal@kernel.org> + +From: Uwe Kleine-König + +[ Upstream commit 6d71c62b13c33ea858ab298fe20beaec5736edc7 ] + +To prepare serdev driver to migrate away from struct device_driver::shutdown +(and then eventually remove that callback) create a serdev driver shutdown +callback and migration code to keep the existing behaviour. Note this +introduces a warning for each driver at register time that isn't converted +yet to that callback. + +Signed-off-by: Uwe Kleine-König +Link: https://patch.msgid.link/ab518883e3ed0976a19cb5b5b5faf42bd3a655b7.1765526117.git.u.kleine-koenig@baylibre.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 375ba7484132 ("Bluetooth: hci_qca: Convert timeout from jiffies to ms") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serdev/core.c | 21 +++++++++++++++++++++ + include/linux/serdev.h | 1 + + 2 files changed, 22 insertions(+) + +--- a/drivers/tty/serdev/core.c ++++ b/drivers/tty/serdev/core.c +@@ -441,11 +441,21 @@ static void serdev_drv_remove(struct dev + dev_pm_domain_detach(dev, true); + } + ++static void serdev_drv_shutdown(struct device *dev) ++{ ++ const struct serdev_device_driver *sdrv = ++ to_serdev_device_driver(dev->driver); ++ ++ if (dev->driver && sdrv->shutdown) ++ sdrv->shutdown(to_serdev_device(dev)); ++} ++ + static const struct bus_type serdev_bus_type = { + .name = "serial", + .match = serdev_device_match, + .probe = serdev_drv_probe, + .remove = serdev_drv_remove, ++ .shutdown = serdev_drv_shutdown, + }; + + /** +@@ -839,6 +849,14 @@ void serdev_controller_remove(struct ser + } + EXPORT_SYMBOL_GPL(serdev_controller_remove); + ++static void serdev_legacy_shutdown(struct serdev_device *serdev) ++{ ++ struct device *dev = &serdev->dev; ++ struct device_driver *driver = dev->driver; ++ ++ driver->shutdown(dev); ++} ++ + /** + * __serdev_device_driver_register() - Register client driver with serdev core + * @sdrv: client driver to be associated with client-device. +@@ -855,6 +873,9 @@ int __serdev_device_driver_register(stru + /* force drivers to async probe so I/O is possible in probe */ + sdrv->driver.probe_type = PROBE_PREFER_ASYNCHRONOUS; + ++ if (!sdrv->shutdown && sdrv->driver.shutdown) ++ sdrv->shutdown = serdev_legacy_shutdown; ++ + return driver_register(&sdrv->driver); + } + EXPORT_SYMBOL_GPL(__serdev_device_driver_register); +--- a/include/linux/serdev.h ++++ b/include/linux/serdev.h +@@ -65,6 +65,7 @@ struct serdev_device_driver { + struct device_driver driver; + int (*probe)(struct serdev_device *); + void (*remove)(struct serdev_device *); ++ void (*shutdown)(struct serdev_device *); + }; + + static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d) diff --git a/queue-6.6/series b/queue-6.6/series index 25537c35c1..df6775514d 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -377,3 +377,32 @@ ksmbd-fix-oob-write-in-query_info-for-compound-requests.patch cgroup-cpuset-reset-dl-migration-state-on-can_attach-failure.patch genetlink-use-internal-flags-for-multicast-groups.patch smb-client-require-net-admin-for-cifs-swn-netlink.patch +platform-x86-intel-vsec-fix-enable_cnt-imbalance-on-pcie-error-recovery.patch +octeontx2-af-replace-deprecated-strncpy-with-strscpy.patch +octeontx2-af-cgx-add-bounds-check-to-cgx_speed_mbps-index.patch +mptcp-use-plain-bool-instead-of-custom-binary-enum.patch +mptcp-cleanup-fallback-dummy-mapping-generation.patch +mptcp-reset-rcv-wnd-on-disconnect.patch +selftests-mptcp-drop-nanoseconds-width-specifier.patch +ice-fix-vf-queue-configuration-with-low-mtu-values.patch +mptcp-pm-fix-add_addr-timer-infinite-retry-on-option-space-insufficient.patch +ipv6-addrconf-annotate-data-races-around-devconf-fields-ii.patch +ipv6-ioam-add-null-check-for-idev-in-ipv6_hop_ioam.patch +net-hsr-defer-node-table-free-until-after-rcu-readers.patch +rxrpc-fix-data-decrypt-vs-splice-by-copying-data-to-buffer-in-recvmsg.patch +rxrpc-fix-response-packet-verification-to-extract-skb-to-a-linear-buffer.patch +af_unix-cache-state-msg-in-unix_stream_read_generic.patch +af_unix-fix-uaf-read-of-tail-len-in-unix_stream_data_wait.patch +octeontx2-pf-avoid-double-free-of-pool-stack-on-aq-init-failure.patch +arm64-tlb-flush-walk-cache-when-unsharing-pmd-tables.patch +iio-adc-npcm-convert-to-platform-remove-callback-returning-void.patch +iio-adc-npcm-fix-unbalanced-clk_disable_unprepare.patch +mptcp-introduce-the-mptcp_init_skb-helper.patch +mptcp-do-not-drop-partial-packets.patch +mm-memory-fix-spurious-warning-when-unmapping-device-private-exclusive-pages.patch +mm-damon-sysfs-schemes-delete-tried-region-in-regions_rmdirs.patch +serdev-make-serdev_bus_type-const.patch +serdev-provide-a-bustype-shutdown-function.patch +bluetooth-hci_qca-migrate-to-serdev-specific-shutdown-function.patch +bluetooth-hci_qca-convert-timeout-from-jiffies-to-ms.patch +mptcp-handle-first-subflow-closing-consistently.patch