From: Greg Kroah-Hartman Date: Fri, 22 Aug 2025 06:09:01 +0000 (+0200) Subject: 6.16-stable patches X-Git-Tag: v6.16.3~71 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3e71da6f5f8b318874f57d5b611dc5140d05597d;p=thirdparty%2Fkernel%2Fstable-queue.git 6.16-stable patches added patches: acpi-apei-einj-fix-resource-leak-by-remove-callback-in-.exit.text.patch acpi-pfr_update-fix-the-driver-update-version-check.patch cgroup-avoid-null-de-ref-in-css_rstat_exit.patch cpuidle-governors-menu-avoid-selecting-states-with-too-much-latency.patch ipv6-sr-fix-mac-comparison-to-be-constant-time.patch loongarch-kvm-add-address-alignment-check-in-pch_pic-register-access.patch loongarch-kvm-fix-stack-protector-issue-in-send_ipi_data.patch loongarch-kvm-make-function-kvm_own_lbt-robust.patch mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch mptcp-drop-skb-if-mptcp-skb-extension-allocation-fails.patch mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch mptcp-remove-duplicate-sk_reset_timer-call.patch net-hsr-reject-hsr-frame-if-skb-can-t-hold-tag.patch sched-ext-fix-invalid-task-state-transitions-on-class-switch.patch selftests-mptcp-connect-fix-c23-extension-warning.patch selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch selftests-mptcp-sockopt-fix-c23-extension-warning.patch --- diff --git a/queue-6.16/acpi-apei-einj-fix-resource-leak-by-remove-callback-in-.exit.text.patch b/queue-6.16/acpi-apei-einj-fix-resource-leak-by-remove-callback-in-.exit.text.patch new file mode 100644 index 0000000000..0b054d551f --- /dev/null +++ b/queue-6.16/acpi-apei-einj-fix-resource-leak-by-remove-callback-in-.exit.text.patch @@ -0,0 +1,60 @@ +From b21d1fbb97c814c76ffa392cd603f8cd3ecc0355 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= +Date: Thu, 14 Aug 2025 07:11:57 +0200 +Subject: ACPI: APEI: EINJ: Fix resource leak by remove callback in .exit.text +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +commit b21d1fbb97c814c76ffa392cd603f8cd3ecc0355 upstream. + +The .remove() callback is also used during error handling in +faux_probe(). As einj_remove() was marked with __exit it's not linked +into the kernel if the driver is built-in, potentially resulting in +resource leaks. + +Also remove the comment justifying the __exit annotation which doesn't +apply any more since the driver was converted to the faux device +interface. + +Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") +Signed-off-by: Uwe Kleine-König +Cc: 6.16+ # 6.16+ +Link: https://patch.msgid.link/20250814051157.35867-2-u.kleine-koenig@baylibre.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/apei/einj-core.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/drivers/acpi/apei/einj-core.c ++++ b/drivers/acpi/apei/einj-core.c +@@ -842,7 +842,7 @@ err_put_table: + return rc; + } + +-static void __exit einj_remove(struct faux_device *fdev) ++static void einj_remove(struct faux_device *fdev) + { + struct apei_exec_context ctx; + +@@ -864,15 +864,9 @@ static void __exit einj_remove(struct fa + } + + static struct faux_device *einj_dev; +-/* +- * einj_remove() lives in .exit.text. For drivers registered via +- * platform_driver_probe() this is ok because they cannot get unbound at +- * runtime. So mark the driver struct with __refdata to prevent modpost +- * triggering a section mismatch warning. +- */ +-static struct faux_device_ops einj_device_ops __refdata = { ++static struct faux_device_ops einj_device_ops = { + .probe = einj_probe, +- .remove = __exit_p(einj_remove), ++ .remove = einj_remove, + }; + + static int __init einj_init(void) diff --git a/queue-6.16/acpi-pfr_update-fix-the-driver-update-version-check.patch b/queue-6.16/acpi-pfr_update-fix-the-driver-update-version-check.patch new file mode 100644 index 0000000000..9e69eb0518 --- /dev/null +++ b/queue-6.16/acpi-pfr_update-fix-the-driver-update-version-check.patch @@ -0,0 +1,49 @@ +From 8151320c747efb22d30b035af989fed0d502176e Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Tue, 22 Jul 2025 22:32:33 +0800 +Subject: ACPI: pfr_update: Fix the driver update version check + +From: Chen Yu + +commit 8151320c747efb22d30b035af989fed0d502176e upstream. + +The security-version-number check should be used rather +than the runtime version check for driver updates. + +Otherwise, the firmware update would fail when the update binary had +a lower runtime version number than the current one. + +Fixes: 0db89fa243e5 ("ACPI: Introduce Platform Firmware Runtime Update device driver") +Cc: 5.17+ # 5.17+ +Reported-by: "Govindarajulu, Hariganesh" +Signed-off-by: Chen Yu +Link: https://patch.msgid.link/20250722143233.3970607-1-yu.c.chen@intel.com +[ rjw: Changelog edits ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/pfr_update.c | 2 +- + include/uapi/linux/pfrut.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/acpi/pfr_update.c ++++ b/drivers/acpi/pfr_update.c +@@ -310,7 +310,7 @@ static bool applicable_image(const void + if (type == PFRU_CODE_INJECT_TYPE) + return payload_hdr->rt_ver >= cap->code_rt_version; + +- return payload_hdr->rt_ver >= cap->drv_rt_version; ++ return payload_hdr->svn_ver >= cap->drv_svn; + } + + static void print_update_debug_info(struct pfru_updated_result *result, +--- a/include/uapi/linux/pfrut.h ++++ b/include/uapi/linux/pfrut.h +@@ -89,6 +89,7 @@ struct pfru_payload_hdr { + __u32 hw_ver; + __u32 rt_ver; + __u8 platform_id[16]; ++ __u32 svn_ver; + }; + + enum pfru_dsm_status { diff --git a/queue-6.16/cgroup-avoid-null-de-ref-in-css_rstat_exit.patch b/queue-6.16/cgroup-avoid-null-de-ref-in-css_rstat_exit.patch new file mode 100644 index 0000000000..b860937569 --- /dev/null +++ b/queue-6.16/cgroup-avoid-null-de-ref-in-css_rstat_exit.patch @@ -0,0 +1,67 @@ +From eea51c6e3f6675b795f6439eaa960eb2948d6905 Mon Sep 17 00:00:00 2001 +From: JP Kobryn +Date: Wed, 6 Aug 2025 17:33:50 -0700 +Subject: cgroup: avoid null de-ref in css_rstat_exit() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: JP Kobryn + +commit eea51c6e3f6675b795f6439eaa960eb2948d6905 upstream. + +css_rstat_exit() may be called asynchronously in scenarios where preceding +calls to css_rstat_init() have not completed. One such example is this +sequence below: + +css_create(...) +{ + ... + init_and_link_css(css, ...); + + err = percpu_ref_init(...); + if (err) + goto err_free_css; + err = cgroup_idr_alloc(...); + if (err) + goto err_free_css; + err = css_rstat_init(css, ...); + if (err) + goto err_free_css; + ... +err_free_css: + INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); + queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); + return ERR_PTR(err); +} + +If any of the three goto jumps are taken, async cleanup will begin and +css_rstat_exit() will be invoked on an uninitialized css->rstat_cpu. + +Avoid accessing the unitialized field by returning early in +css_rstat_exit() if this is the case. + +Signed-off-by: JP Kobryn +Suggested-by: Michal Koutný +Fixes: 5da3bfa029d68 ("cgroup: use separate rstat trees for each subsystem") +Cc: stable@vger.kernel.org # v6.16 +Reported-by: syzbot+8d052e8b99e40bc625ed@syzkaller.appspotmail.com +Acked-by: Shakeel Butt +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/rstat.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -488,6 +488,9 @@ void css_rstat_exit(struct cgroup_subsys + if (!css_uses_rstat(css)) + return; + ++ if (!css->rstat_cpu) ++ return; ++ + css_rstat_flush(css); + + /* sanity check */ diff --git a/queue-6.16/cpuidle-governors-menu-avoid-selecting-states-with-too-much-latency.patch b/queue-6.16/cpuidle-governors-menu-avoid-selecting-states-with-too-much-latency.patch new file mode 100644 index 0000000000..9c5a869dd8 --- /dev/null +++ b/queue-6.16/cpuidle-governors-menu-avoid-selecting-states-with-too-much-latency.patch @@ -0,0 +1,122 @@ +From 779b1a1cb13ae17028aeddb2fbbdba97357a1e15 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Wed, 13 Aug 2025 12:25:58 +0200 +Subject: cpuidle: governors: menu: Avoid selecting states with too much latency + +From: Rafael J. Wysocki + +commit 779b1a1cb13ae17028aeddb2fbbdba97357a1e15 upstream. + +Occasionally, the exit latency of the idle state selected by the menu +governor may exceed the PM QoS CPU wakeup latency limit. Namely, if the +scheduler tick has been stopped already and predicted_ns is greater than +the tick period length, the governor may return an idle state whose exit +latency exceeds latency_req because that decision is made before +checking the current idle state's exit latency. + +For instance, say that there are 3 idle states, 0, 1, and 2. For idle +states 0 and 1, the exit latency is equal to the target residency and +the values are 0 and 5 us, respectively. State 2 is deeper and has the +exit latency and target residency of 200 us and 2 ms (which is greater +than the tick period length), respectively. + +Say that predicted_ns is equal to TICK_NSEC and the PM QoS latency +limit is 20 us. After the first two iterations of the main loop in +menu_select(), idx becomes 1 and in the third iteration of it the target +residency of the current state (state 2) is greater than predicted_ns. +State 2 is not a polling one and predicted_ns is not less than TICK_NSEC, +so the check on whether or not the tick has been stopped is done. Say +that the tick has been stopped already and there are no imminent timers +(that is, delta_tick is greater than the target residency of state 2). +In that case, idx becomes 2 and it is returned immediately, but the exit +latency of state 2 exceeds the latency limit. + +Address this issue by modifying the code to compare the exit latency of +the current idle state (idle state i) with the latency limit before +comparing its target residency with predicted_ns, which allows one +more exit_latency_ns check that becomes redundant to be dropped. + +However, after the above change, latency_req cannot take the predicted_ns +value any more, which takes place after commit 38f83090f515 ("cpuidle: +menu: Remove iowait influence"), because it may cause a polling state +to be returned prematurely. + +In the context of the previous example say that predicted_ns is 3000 and +the PM QoS latency limit is still 20 us. Additionally, say that idle +state 0 is a polling one. Moving the exit_latency_ns check before the +target_residency_ns one causes the loop to terminate in the second +iteration, before the target_residency_ns check, so idle state 0 will be +returned even though previously state 1 would be returned if there were +no imminent timers. + +For this reason, remove the assignment of the predicted_ns value to +latency_req from the code. + +Fixes: 5ef499cd571c ("cpuidle: menu: Handle stopped tick more aggressively") +Cc: 4.17+ # 4.17+ +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Christian Loehle +Link: https://patch.msgid.link/5043159.31r3eYUQgx@rafael.j.wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpuidle/governors/menu.c | 29 ++++++++++++----------------- + 1 file changed, 12 insertions(+), 17 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -287,20 +287,15 @@ static int menu_select(struct cpuidle_dr + return 0; + } + +- if (tick_nohz_tick_stopped()) { +- /* +- * If the tick is already stopped, the cost of possible short +- * idle duration misprediction is much higher, because the CPU +- * may be stuck in a shallow idle state for a long time as a +- * result of it. In that case say we might mispredict and use +- * the known time till the closest timer event for the idle +- * state selection. +- */ +- if (predicted_ns < TICK_NSEC) +- predicted_ns = data->next_timer_ns; +- } else if (latency_req > predicted_ns) { +- latency_req = predicted_ns; +- } ++ /* ++ * If the tick is already stopped, the cost of possible short idle ++ * duration misprediction is much higher, because the CPU may be stuck ++ * in a shallow idle state for a long time as a result of it. In that ++ * case, say we might mispredict and use the known time till the closest ++ * timer event for the idle state selection. ++ */ ++ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) ++ predicted_ns = data->next_timer_ns; + + /* + * Find the idle state with the lowest power while satisfying +@@ -316,13 +311,15 @@ static int menu_select(struct cpuidle_dr + if (idx == -1) + idx = i; /* first enabled state */ + ++ if (s->exit_latency_ns > latency_req) ++ break; ++ + if (s->target_residency_ns > predicted_ns) { + /* + * Use a physical idle state, not busy polling, unless + * a timer is going to trigger soon enough. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && +- s->exit_latency_ns <= latency_req && + s->target_residency_ns <= data->next_timer_ns) { + predicted_ns = s->target_residency_ns; + idx = i; +@@ -354,8 +351,6 @@ static int menu_select(struct cpuidle_dr + + return idx; + } +- if (s->exit_latency_ns > latency_req) +- break; + + idx = i; + } diff --git a/queue-6.16/ipv6-sr-fix-mac-comparison-to-be-constant-time.patch b/queue-6.16/ipv6-sr-fix-mac-comparison-to-be-constant-time.patch new file mode 100644 index 0000000000..7aa475e6e0 --- /dev/null +++ b/queue-6.16/ipv6-sr-fix-mac-comparison-to-be-constant-time.patch @@ -0,0 +1,42 @@ +From a458b2902115b26a25d67393b12ddd57d1216aaa Mon Sep 17 00:00:00 2001 +From: Eric Biggers +Date: Mon, 18 Aug 2025 13:27:24 -0700 +Subject: ipv6: sr: Fix MAC comparison to be constant-time + +From: Eric Biggers + +commit a458b2902115b26a25d67393b12ddd57d1216aaa upstream. + +To prevent timing attacks, MACs need to be compared in constant time. +Use the appropriate helper function for this. + +Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") +Cc: stable@vger.kernel.org +Signed-off-by: Eric Biggers +Reviewed-by: Andrea Mayer +Link: https://patch.msgid.link/20250818202724.15713-1-ebiggers@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_hmac.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv6/seg6_hmac.c ++++ b/net/ipv6/seg6_hmac.c +@@ -35,6 +35,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -280,7 +281,7 @@ bool seg6_hmac_validate_skb(struct sk_bu + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + +- if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) ++ if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN)) + return false; + + return true; diff --git a/queue-6.16/loongarch-kvm-add-address-alignment-check-in-pch_pic-register-access.patch b/queue-6.16/loongarch-kvm-add-address-alignment-check-in-pch_pic-register-access.patch new file mode 100644 index 0000000000..e0fbb3dc55 --- /dev/null +++ b/queue-6.16/loongarch-kvm-add-address-alignment-check-in-pch_pic-register-access.patch @@ -0,0 +1,48 @@ +From 538c06e3964a8e94b645686cc58ccc4a06fa6330 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 20 Aug 2025 22:51:15 +0800 +Subject: LoongArch: KVM: Add address alignment check in pch_pic register access + +From: Bibo Mao + +commit 538c06e3964a8e94b645686cc58ccc4a06fa6330 upstream. + +With pch_pic device, its register is based on MMIO address space, +different access size 1/2/4/8 is supported. And base address should +be naturally aligned with its access size, here add alignment check +in its register access emulation function. + +Cc: stable@vger.kernel.org +Signed-off-by: Bibo Mao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/intc/pch_pic.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/loongarch/kvm/intc/pch_pic.c ++++ b/arch/loongarch/kvm/intc/pch_pic.c +@@ -195,6 +195,11 @@ static int kvm_pch_pic_read(struct kvm_v + return -EINVAL; + } + ++ if (addr & (len - 1)) { ++ kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); ++ return -EINVAL; ++ } ++ + /* statistics of pch pic reading */ + vcpu->kvm->stat.pch_pic_read_exits++; + ret = loongarch_pch_pic_read(s, addr, len, val); +@@ -302,6 +307,11 @@ static int kvm_pch_pic_write(struct kvm_ + return -EINVAL; + } + ++ if (addr & (len - 1)) { ++ kvm_err("%s: pch pic not aligned addr %llx len %d\n", __func__, addr, len); ++ return -EINVAL; ++ } ++ + /* statistics of pch pic writing */ + vcpu->kvm->stat.pch_pic_write_exits++; + ret = loongarch_pch_pic_write(s, addr, len, val); diff --git a/queue-6.16/loongarch-kvm-fix-stack-protector-issue-in-send_ipi_data.patch b/queue-6.16/loongarch-kvm-fix-stack-protector-issue-in-send_ipi_data.patch new file mode 100644 index 0000000000..b8d964415d --- /dev/null +++ b/queue-6.16/loongarch-kvm-fix-stack-protector-issue-in-send_ipi_data.patch @@ -0,0 +1,81 @@ +From 5c68549c81bcca70fc464e305ffeefd9af968287 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 20 Aug 2025 22:51:15 +0800 +Subject: LoongArch: KVM: Fix stack protector issue in send_ipi_data() + +From: Bibo Mao + +commit 5c68549c81bcca70fc464e305ffeefd9af968287 upstream. + +Function kvm_io_bus_read() is called in function send_ipi_data(), buffer +size of parameter *val should be at least 8 bytes. Since some emulation +functions like loongarch_ipi_readl() and kvm_eiointc_read() will write +the buffer *val with 8 bytes signed extension regardless parameter len. + +Otherwise there will be buffer overflow issue when CONFIG_STACKPROTECTOR +is enabled. The bug report is shown as follows: + +Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: send_ipi_data+0x194/0x1a0 [kvm] +CPU: 11 UID: 107 PID: 2692 Comm: CPU 0/KVM Not tainted 6.17.0-rc1+ #102 PREEMPT(full) +Stack : 9000000005901568 0000000000000000 9000000003af371c 900000013c68c000 + 900000013c68f850 900000013c68f858 0000000000000000 900000013c68f998 + 900000013c68f990 900000013c68f990 900000013c68f6c0 fffffffffffdb058 + fffffffffffdb0e0 900000013c68f858 911e1d4d39cf0ec2 9000000105657a00 + 0000000000000001 fffffffffffffffe 0000000000000578 282049464555206e + 6f73676e6f6f4c20 0000000000000001 00000000086b4000 0000000000000000 + 0000000000000000 0000000000000000 9000000005709968 90000000058f9000 + 900000013c68fa68 900000013c68fab4 90000000029279f0 900000010153f940 + 900000010001f360 0000000000000000 9000000003af3734 000000004390000c + 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d + ... +Call Trace: +[<9000000003af3734>] show_stack+0x5c/0x180 +[<9000000003aed168>] dump_stack_lvl+0x6c/0x9c +[<9000000003ad0ab0>] vpanic+0x108/0x2c4 +[<9000000003ad0ca8>] panic+0x3c/0x40 +[<9000000004eb0a1c>] __stack_chk_fail+0x14/0x18 +[] send_ipi_data+0x190/0x1a0 [kvm] +[] __kvm_io_bus_write+0xa4/0xe8 [kvm] +[] kvm_io_bus_write+0x54/0x90 [kvm] +[] kvm_emu_iocsr+0x180/0x310 [kvm] +[] kvm_handle_gspr+0x280/0x478 [kvm] +[] kvm_handle_exit+0xc0/0x130 [kvm] + +Cc: stable@vger.kernel.org +Fixes: daee2f9cae551 ("LoongArch: KVM: Add IPI read and write function") +Signed-off-by: Bibo Mao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/intc/ipi.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/loongarch/kvm/intc/ipi.c ++++ b/arch/loongarch/kvm/intc/ipi.c +@@ -99,7 +99,7 @@ static void write_mailbox(struct kvm_vcp + static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) + { + int i, idx, ret; +- uint32_t val = 0, mask = 0; ++ uint64_t val = 0, mask = 0; + + /* + * Bit 27-30 is mask for byte writing. +@@ -108,7 +108,7 @@ static int send_ipi_data(struct kvm_vcpu + if ((data >> 27) & 0xf) { + /* Read the old val */ + idx = srcu_read_lock(&vcpu->kvm->srcu); +- ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); ++ ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, 4, &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (unlikely(ret)) { + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); +@@ -124,7 +124,7 @@ static int send_ipi_data(struct kvm_vcpu + } + val |= ((uint32_t)(data >> 32) & ~mask); + idx = srcu_read_lock(&vcpu->kvm->srcu); +- ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); ++ ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, 4, &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (unlikely(ret)) + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); diff --git a/queue-6.16/loongarch-kvm-make-function-kvm_own_lbt-robust.patch b/queue-6.16/loongarch-kvm-make-function-kvm_own_lbt-robust.patch new file mode 100644 index 0000000000..dcf75f45d4 --- /dev/null +++ b/queue-6.16/loongarch-kvm-make-function-kvm_own_lbt-robust.patch @@ -0,0 +1,37 @@ +From 4be8cefc132606b4a6e851f37f8e8c40c406c910 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 20 Aug 2025 22:51:14 +0800 +Subject: LoongArch: KVM: Make function kvm_own_lbt() robust + +From: Bibo Mao + +commit 4be8cefc132606b4a6e851f37f8e8c40c406c910 upstream. + +Add the flag KVM_LARCH_LBT checking in function kvm_own_lbt(), so that +it can be called safely rather than duplicated enabling again. + +Cc: stable@vger.kernel.org +Signed-off-by: Bibo Mao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/vcpu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/loongarch/kvm/vcpu.c ++++ b/arch/loongarch/kvm/vcpu.c +@@ -1277,9 +1277,11 @@ int kvm_own_lbt(struct kvm_vcpu *vcpu) + return -EINVAL; + + preempt_disable(); +- set_csr_euen(CSR_EUEN_LBTEN); +- _restore_lbt(&vcpu->arch.lbt); +- vcpu->arch.aux_inuse |= KVM_LARCH_LBT; ++ if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { ++ set_csr_euen(CSR_EUEN_LBTEN); ++ _restore_lbt(&vcpu->arch.lbt); ++ vcpu->arch.aux_inuse |= KVM_LARCH_LBT; ++ } + preempt_enable(); + + return 0; diff --git a/queue-6.16/mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch b/queue-6.16/mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch new file mode 100644 index 0000000000..65261b2123 --- /dev/null +++ b/queue-6.16/mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch @@ -0,0 +1,91 @@ +From f5ce0714623cffd00bf2a83e890d09c609b7f50a Mon Sep 17 00:00:00 2001 +From: Geliang Tang +Date: Fri, 15 Aug 2025 19:28:23 +0200 +Subject: mptcp: disable add_addr retransmission when timeout is 0 + +From: Geliang Tang + +commit f5ce0714623cffd00bf2a83e890d09c609b7f50a upstream. + +When add_addr_timeout was set to 0, this caused the ADD_ADDR to be +retransmitted immediately, which looks like a buggy behaviour. Instead, +interpret 0 as "no retransmissions needed". + +The documentation is updated to explicitly state that setting the timeout +to 0 disables retransmission. + +Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") +Cc: stable@vger.kernel.org +Suggested-by: Matthieu Baerts +Signed-off-by: Geliang Tang +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-5-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/mptcp-sysctl.rst | 2 ++ + net/mptcp/pm.c | 13 ++++++++++--- + 2 files changed, 12 insertions(+), 3 deletions(-) + +--- a/Documentation/networking/mptcp-sysctl.rst ++++ b/Documentation/networking/mptcp-sysctl.rst +@@ -12,6 +12,8 @@ add_addr_timeout - INTEGER (seconds) + resent to an MPTCP peer that has not acknowledged a previous + ADD_ADDR message. + ++ Do not retransmit if set to 0. ++ + The default value matches TCP_RTO_MAX. This is a per-namespace + sysctl. + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -274,6 +274,7 @@ static void mptcp_pm_add_timer(struct ti + add_timer); + struct mptcp_sock *msk = entry->sock; + struct sock *sk = (struct sock *)msk; ++ unsigned int timeout; + + pr_debug("msk=%p\n", msk); + +@@ -291,6 +292,10 @@ static void mptcp_pm_add_timer(struct ti + goto out; + } + ++ timeout = mptcp_get_add_addr_timeout(sock_net(sk)); ++ if (!timeout) ++ goto out; ++ + spin_lock_bh(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal_addr(msk)) { +@@ -302,7 +307,7 @@ static void mptcp_pm_add_timer(struct ti + + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) + sk_reset_timer(sk, timer, +- jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); ++ jiffies + timeout); + + spin_unlock_bh(&msk->pm.lock); + +@@ -344,6 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mpt + struct mptcp_pm_add_entry *add_entry = NULL; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); ++ unsigned int timeout; + + lockdep_assert_held(&msk->pm.lock); + +@@ -368,8 +374,9 @@ bool mptcp_pm_alloc_anno_list(struct mpt + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); + reset_timer: +- sk_reset_timer(sk, &add_entry->add_timer, +- jiffies + mptcp_get_add_addr_timeout(net)); ++ timeout = mptcp_get_add_addr_timeout(net); ++ if (timeout) ++ sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); + + return true; + } diff --git a/queue-6.16/mptcp-drop-skb-if-mptcp-skb-extension-allocation-fails.patch b/queue-6.16/mptcp-drop-skb-if-mptcp-skb-extension-allocation-fails.patch new file mode 100644 index 0000000000..9cc21576fd --- /dev/null +++ b/queue-6.16/mptcp-drop-skb-if-mptcp-skb-extension-allocation-fails.patch @@ -0,0 +1,62 @@ +From ccab044697980c6c01ab51f43f48f13b8a3e5c33 Mon Sep 17 00:00:00 2001 +From: Christoph Paasch +Date: Fri, 15 Aug 2025 19:28:19 +0200 +Subject: mptcp: drop skb if MPTCP skb extension allocation fails + +From: Christoph Paasch + +commit ccab044697980c6c01ab51f43f48f13b8a3e5c33 upstream. + +When skb_ext_add(skb, SKB_EXT_MPTCP) fails in mptcp_incoming_options(), +we used to return true, letting the segment proceed through the TCP +receive path without a DSS mapping. Such segments can leave inconsistent +mapping state and trigger a mid-stream fallback to TCP, which in testing +collapsed (by artificially forcing failures in skb_ext_add) throughput +to zero. + +Return false instead so the TCP input path drops the skb (see +tcp_data_queue() and step-7 processing). This is the safer choice +under memory pressure: it preserves MPTCP correctness and provides +backpressure to the sender. + +Control packets remain unaffected: ACK updates and DATA_FIN handling +happen before attempting the extension allocation, and tcp_reset() +continues to ignore the return value. + +With this change, MPTCP continues to work at high throughput if we +artificially inject failures into skb_ext_add. + +Fixes: 6787b7e350d3 ("mptcp: avoid processing packet if a subflow reset") +Cc: stable@vger.kernel.org +Signed-off-by: Christoph Paasch +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-1-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/options.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/mptcp/options.c ++++ b/net/mptcp/options.c +@@ -1117,7 +1117,9 @@ static bool add_addr_hmac_valid(struct m + return hmac == mp_opt->ahmac; + } + +-/* Return false if a subflow has been reset, else return true */ ++/* Return false in case of error (or subflow has been reset), ++ * else return true. ++ */ + bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) + { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); +@@ -1221,7 +1223,7 @@ bool mptcp_incoming_options(struct sock + + mpext = skb_ext_add(skb, SKB_EXT_MPTCP); + if (!mpext) +- return true; ++ return false; + + memset(mpext, 0, sizeof(*mpext)); + diff --git a/queue-6.16/mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch b/queue-6.16/mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch new file mode 100644 index 0000000000..1d86176e1b --- /dev/null +++ b/queue-6.16/mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch @@ -0,0 +1,40 @@ +From 68fc0f4b0d25692940cdc85c68e366cae63e1757 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 15 Aug 2025 19:28:20 +0200 +Subject: mptcp: pm: kernel: flush: do not reset ADD_ADDR limit + +From: Matthieu Baerts (NGI0) + +commit 68fc0f4b0d25692940cdc85c68e366cae63e1757 upstream. + +A flush of the MPTCP endpoints should not affect the MPTCP limits. In +other words, 'ip mptcp endpoint flush' should not change 'ip mptcp +limits'. + +But it was the case: the MPTCP_PM_ATTR_RCV_ADD_ADDRS (add_addr_accepted) +limit was reset by accident. Removing the reset of this counter during a +flush fixes this issue. + +Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") +Cc: stable@vger.kernel.org +Reported-by: Thomas Dreibholz +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/579 +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-2-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_kernel.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/mptcp/pm_kernel.c ++++ b/net/mptcp/pm_kernel.c +@@ -1085,7 +1085,6 @@ static void __flush_addrs(struct list_he + static void __reset_counters(struct pm_nl_pernet *pernet) + { + WRITE_ONCE(pernet->add_addr_signal_max, 0); +- WRITE_ONCE(pernet->add_addr_accept_max, 0); + WRITE_ONCE(pernet->local_addr_max, 0); + pernet->addrs = 0; + } diff --git a/queue-6.16/mptcp-remove-duplicate-sk_reset_timer-call.patch b/queue-6.16/mptcp-remove-duplicate-sk_reset_timer-call.patch new file mode 100644 index 0000000000..e901e0432e --- /dev/null +++ b/queue-6.16/mptcp-remove-duplicate-sk_reset_timer-call.patch @@ -0,0 +1,50 @@ +From 5d13349472ac8abcbcb94407969aa0fdc2e1f1be Mon Sep 17 00:00:00 2001 +From: Geliang Tang +Date: Fri, 15 Aug 2025 19:28:22 +0200 +Subject: mptcp: remove duplicate sk_reset_timer call + +From: Geliang Tang + +commit 5d13349472ac8abcbcb94407969aa0fdc2e1f1be upstream. + +sk_reset_timer() was called twice in mptcp_pm_alloc_anno_list. + +Simplify the code by using a 'goto' statement to eliminate the +duplication. + +Note that this is not a fix, but it will help backporting the following +patch. The same "Fixes" tag has been added for this reason. + +Fixes: 93f323b9cccc ("mptcp: add a new sysctl add_addr_timeout") +Cc: stable@vger.kernel.org +Signed-off-by: Geliang Tang +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-4-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -353,9 +353,7 @@ bool mptcp_pm_alloc_anno_list(struct mpt + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) + return false; + +- sk_reset_timer(sk, &add_entry->add_timer, +- jiffies + mptcp_get_add_addr_timeout(net)); +- return true; ++ goto reset_timer; + } + + add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); +@@ -369,6 +367,7 @@ bool mptcp_pm_alloc_anno_list(struct mpt + add_entry->retrans_times = 0; + + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); ++reset_timer: + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + diff --git a/queue-6.16/net-hsr-reject-hsr-frame-if-skb-can-t-hold-tag.patch b/queue-6.16/net-hsr-reject-hsr-frame-if-skb-can-t-hold-tag.patch new file mode 100644 index 0000000000..e5b82c8622 --- /dev/null +++ b/queue-6.16/net-hsr-reject-hsr-frame-if-skb-can-t-hold-tag.patch @@ -0,0 +1,189 @@ +From 7af76e9d18a9fd6f8611b3313c86c190f9b6a5a7 Mon Sep 17 00:00:00 2001 +From: Jakub Acs +Date: Tue, 19 Aug 2025 08:28:42 +0000 +Subject: net, hsr: reject HSR frame if skb can't hold tag +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jakub Acs + +commit 7af76e9d18a9fd6f8611b3313c86c190f9b6a5a7 upstream. + +Receiving HSR frame with insufficient space to hold HSR tag in the skb +can result in a crash (kernel BUG): + +[ 45.390915] skbuff: skb_under_panic: text:ffffffff86f32cac len:26 put:14 head:ffff888042418000 data:ffff888042417ff4 tail:0xe end:0x180 dev:bridge_slave_1 +[ 45.392559] ------------[ cut here ]------------ +[ 45.392912] kernel BUG at net/core/skbuff.c:211! +[ 45.393276] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 45.393809] CPU: 1 UID: 0 PID: 2496 Comm: reproducer Not tainted 6.15.0 #12 PREEMPT(undef) +[ 45.394433] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 45.395273] RIP: 0010:skb_panic+0x15b/0x1d0 + + + +[ 45.402911] Call Trace: +[ 45.403105] +[ 45.404470] skb_push+0xcd/0xf0 +[ 45.404726] br_dev_queue_push_xmit+0x7c/0x6c0 +[ 45.406513] br_forward_finish+0x128/0x260 +[ 45.408483] __br_forward+0x42d/0x590 +[ 45.409464] maybe_deliver+0x2eb/0x420 +[ 45.409763] br_flood+0x174/0x4a0 +[ 45.410030] br_handle_frame_finish+0xc7c/0x1bc0 +[ 45.411618] br_handle_frame+0xac3/0x1230 +[ 45.413674] __netif_receive_skb_core.constprop.0+0x808/0x3df0 +[ 45.422966] __netif_receive_skb_one_core+0xb4/0x1f0 +[ 45.424478] __netif_receive_skb+0x22/0x170 +[ 45.424806] process_backlog+0x242/0x6d0 +[ 45.425116] __napi_poll+0xbb/0x630 +[ 45.425394] net_rx_action+0x4d1/0xcc0 +[ 45.427613] handle_softirqs+0x1a4/0x580 +[ 45.427926] do_softirq+0x74/0x90 +[ 45.428196] + +This issue was found by syzkaller. + +The panic happens in br_dev_queue_push_xmit() once it receives a +corrupted skb with ETH header already pushed in linear data. When it +attempts the skb_push() call, there's not enough headroom and +skb_push() panics. + +The corrupted skb is put on the queue by HSR layer, which makes a +sequence of unintended transformations when it receives a specific +corrupted HSR frame (with incomplete TAG). + +Fix it by dropping and consuming frames that are not long enough to +contain both ethernet and hsr headers. + +Alternative fix would be to check for enough headroom before skb_push() +in br_dev_queue_push_xmit(). + +In the reproducer, this is injected via AF_PACKET, but I don't easily +see why it couldn't be sent over the wire from adjacent network. + +Further Details: + +In the reproducer, the following network interface chain is set up: + +┌────────────────┐ ┌────────────────┐ +│ veth0_to_hsr ├───┤ hsr_slave0 ┼───┐ +└────────────────┘ └────────────────┘ │ + │ ┌──────┐ + ├─┤ hsr0 ├───┐ + │ └──────┘ │ +┌────────────────┐ ┌────────────────┐ │ │┌────────┐ +│ veth1_to_hsr ┼───┤ hsr_slave1 ├───┘ └┤ │ +└────────────────┘ └────────────────┘ ┌┼ bridge │ + ││ │ + │└────────┘ + │ + ┌───────┐ │ + │ ... ├──────┘ + └───────┘ + +To trigger the events leading up to crash, reproducer sends a corrupted +HSR frame with incomplete TAG, via AF_PACKET socket on 'veth0_to_hsr'. + +The first HSR-layer function to process this frame is +hsr_handle_frame(). It and then checks if the +protocol is ETH_P_PRP or ETH_P_HSR. If it is, it calls +skb_set_network_header(skb, ETH_HLEN + HSR_HLEN), without checking that +the skb is long enough. For the crashing frame it is not, and hence the +skb->network_header and skb->mac_len fields are set incorrectly, +pointing after the end of the linear buffer. + +I will call this a BUG#1 and it is what is addressed by this patch. In +the crashing scenario before the fix, the skb continues to go down the +hsr path as follows. + +hsr_handle_frame() then calls this sequence +hsr_forward_skb() + fill_frame_info() + hsr->proto_ops->fill_frame_info() + hsr_fill_frame_info() + +hsr_fill_frame_info() contains a check that intends to check whether the +skb actually contains the HSR header. But the check relies on the +skb->mac_len field which was erroneously setup due to BUG#1, so the +check passes and the execution continues back in the hsr_forward_skb(): + +hsr_forward_skb() + hsr_forward_do() + hsr->proto_ops->get_untagged_frame() + hsr_get_untagged_frame() + create_stripped_skb_hsr() + +In create_stripped_skb_hsr(), a copy of the skb is created and is +further corrupted by operation that attempts to strip the HSR tag in a +call to __pskb_copy(). + +The skb enters create_stripped_skb_hsr() with ethernet header pushed in +linear buffer. The skb_pull(skb_in, HSR_HLEN) thus pulls 6 bytes of +ethernet header into the headroom, creating skb_in with a headroom of +size 8. The subsequent __pskb_copy() then creates an skb with headroom +of just 2 and skb->len of just 12, this is how it looks after the copy: + +gdb) p skb->len +$10 = 12 +(gdb) p skb->data +$11 = (unsigned char *) 0xffff888041e45382 "\252\252\252\252\252!\210\373", +(gdb) p skb->head +$12 = (unsigned char *) 0xffff888041e45380 "" + +It seems create_stripped_skb_hsr() assumes that ETH header is pulled +in the headroom when it's entered, because it just pulls HSR header on +top. But that is not the case in our code-path and we end up with the +corrupted skb instead. I will call this BUG#2 + +*I got confused here because it seems that under no conditions can +create_stripped_skb_hsr() work well, the assumption it makes is not true +during the processing of hsr frames - since the skb_push() in +hsr_handle_frame to skb_pull in hsr_deliver_master(). I wonder whether I +missed something here.* + +Next, the execution arrives in hsr_deliver_master(). It calls +skb_pull(ETH_HLEN), which just returns NULL - the SKB does not have +enough space for the pull (as it only has 12 bytes in total at this +point). + +*The skb_pull() here further suggests that ethernet header is meant +to be pushed through the whole hsr processing and +create_stripped_skb_hsr() should pull it before doing the HSR header +pull.* + +hsr_deliver_master() then puts the corrupted skb on the queue, it is +then picked up from there by bridge frame handling layer and finally +lands in br_dev_queue_push_xmit where it panics. + +Cc: stable@kernel.org +Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") +Reported-by: syzbot+a81f2759d022496b40ab@syzkaller.appspotmail.com +Signed-off-by: Jakub Acs +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20250819082842.94378-1-acsjakub@amazon.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/hsr/hsr_slave.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/net/hsr/hsr_slave.c ++++ b/net/hsr/hsr_slave.c +@@ -63,8 +63,14 @@ static rx_handler_result_t hsr_handle_fr + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) || +- protocol == htons(ETH_P_HSR)) ++ protocol == htons(ETH_P_HSR)) { ++ if (!pskb_may_pull(skb, ETH_HLEN + HSR_HLEN)) { ++ kfree_skb(skb); ++ goto finish_consume; ++ } ++ + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); ++ } + skb_reset_mac_len(skb); + + /* Only the frames received over the interlink port will assign a diff --git a/queue-6.16/sched-ext-fix-invalid-task-state-transitions-on-class-switch.patch b/queue-6.16/sched-ext-fix-invalid-task-state-transitions-on-class-switch.patch new file mode 100644 index 0000000000..6088ce9590 --- /dev/null +++ b/queue-6.16/sched-ext-fix-invalid-task-state-transitions-on-class-switch.patch @@ -0,0 +1,65 @@ +From ddf7233fcab6c247379d0928d46cc316ee122229 Mon Sep 17 00:00:00 2001 +From: Andrea Righi +Date: Tue, 5 Aug 2025 10:59:11 +0200 +Subject: sched/ext: Fix invalid task state transitions on class switch + +From: Andrea Righi + +commit ddf7233fcab6c247379d0928d46cc316ee122229 upstream. + +When enabling a sched_ext scheduler, we may trigger invalid task state +transitions, resulting in warnings like the following (which can be +easily reproduced by running the hotplug selftest in a loop): + + sched_ext: Invalid task state transition 0 -> 3 for fish[770] + WARNING: CPU: 18 PID: 787 at kernel/sched/ext.c:3862 scx_set_task_state+0x7c/0xc0 + ... + RIP: 0010:scx_set_task_state+0x7c/0xc0 + ... + Call Trace: + + scx_enable_task+0x11f/0x2e0 + switching_to_scx+0x24/0x110 + scx_enable.isra.0+0xd14/0x13d0 + bpf_struct_ops_link_create+0x136/0x1a0 + __sys_bpf+0x1edd/0x2c30 + __x64_sys_bpf+0x21/0x30 + do_syscall_64+0xbb/0x370 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +This happens because we skip initialization for tasks that are already +dead (with their usage counter set to zero), but we don't exclude them +during the scheduling class transition phase. + +Fix this by also skipping dead tasks during class swiching, preventing +invalid task state transitions. + +Fixes: a8532fac7b5d2 ("sched_ext: TASK_DEAD tasks must be switched into SCX on ops_enable") +Cc: stable@vger.kernel.org # v6.12+ +Signed-off-by: Andrea Righi +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -5694,6 +5694,9 @@ static int scx_enable(struct sched_ext_o + __setscheduler_class(p->policy, p->prio); + struct sched_enq_and_set_ctx ctx; + ++ if (!tryget_task_struct(p)) ++ continue; ++ + if (old_class != new_class && p->se.sched_delayed) + dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); + +@@ -5706,6 +5709,7 @@ static int scx_enable(struct sched_ext_o + sched_enq_and_set_task(&ctx); + + check_class_changed(task_rq(p), p, old_class, p->prio); ++ put_task_struct(p); + } + scx_task_iter_stop(&sti); + percpu_up_write(&scx_fork_rwsem); diff --git a/queue-6.16/selftests-mptcp-connect-fix-c23-extension-warning.patch b/queue-6.16/selftests-mptcp-connect-fix-c23-extension-warning.patch new file mode 100644 index 0000000000..480d9ad8f4 --- /dev/null +++ b/queue-6.16/selftests-mptcp-connect-fix-c23-extension-warning.patch @@ -0,0 +1,48 @@ +From 2eefbed30d46d5e68593baf6b52923e00e7678af Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 15 Aug 2025 19:28:25 +0200 +Subject: selftests: mptcp: connect: fix C23 extension warning + +From: Matthieu Baerts (NGI0) + +commit 2eefbed30d46d5e68593baf6b52923e00e7678af upstream. + +GCC was complaining about the new label: + + mptcp_connect.c:187:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] + 187 | int err = getaddrinfo(node, service, hints, res); + | ^ + +Simply declare 'err' before the label to avoid this warning. + +Fixes: a862771d1aa4 ("selftests: mptcp: use IPPROTO_MPTCP for getaddrinfo") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-7-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_connect.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c +index ac1349c4b9e5..4f07ac9fa207 100644 +--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c ++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c +@@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, + struct addrinfo *hints, + struct addrinfo **res) + { +-again: +- int err = getaddrinfo(node, service, hints, res); ++ int err; + ++again: ++ err = getaddrinfo(node, service, hints, res); + if (err) { + const char *errstr; + +-- +2.50.1 + diff --git a/queue-6.16/selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch b/queue-6.16/selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch new file mode 100644 index 0000000000..3e9ab705c3 --- /dev/null +++ b/queue-6.16/selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch @@ -0,0 +1,41 @@ +From 452690be7de2f91cc0de68cb9e95252875b33503 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 15 Aug 2025 19:28:21 +0200 +Subject: selftests: mptcp: pm: check flush doesn't reset limits + +From: Matthieu Baerts (NGI0) + +commit 452690be7de2f91cc0de68cb9e95252875b33503 upstream. + +This modification is linked to the parent commit where the received +ADD_ADDR limit was accidentally reset when the endpoints were flushed. + +To validate that, the test is now flushing endpoints after having set +new limits, and before checking them. + +The 'Fixes' tag here below is the same as the one from the previous +commit: this patch here is not fixing anything wrong in the selftests, +but it validates the previous fix for an issue introduced by this commit +ID. + +Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-3-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh ++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh +@@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null + check "get_limits" "${default_limits}" "subflows above hard limit" + + set_limits 8 8 ++flush_endpoint ## to make sure it doesn't affect the limits + check "get_limits" "$(format_limits 8 8)" "set limits" + + flush_endpoint diff --git a/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch b/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch new file mode 100644 index 0000000000..aa11107dd6 --- /dev/null +++ b/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch @@ -0,0 +1,70 @@ +From 3259889fd3c0cc165b7e9ee375c789875dd32326 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 15 Aug 2025 19:28:26 +0200 +Subject: selftests: mptcp: sockopt: fix C23 extension warning + +From: Matthieu Baerts (NGI0) + +commit 3259889fd3c0cc165b7e9ee375c789875dd32326 upstream. + +GCC was complaining about the new label: + + mptcp_inq.c:79:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] + 79 | int err = getaddrinfo(node, service, hints, res); + | ^ + + mptcp_sockopt.c:166:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] + 166 | int err = getaddrinfo(node, service, hints, res); + | ^ + +Simply declare 'err' before the label to avoid this warning. + +Fixes: dd367e81b79a ("selftests: mptcp: sockopt: use IPPROTO_MPTCP for getaddrinfo") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe9957892@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- + tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c +index 3cf1e2a612ce..f3bcaa48df8f 100644 +--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c ++++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c +@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, + struct addrinfo *hints, + struct addrinfo **res) + { +-again: +- int err = getaddrinfo(node, service, hints, res); ++ int err; + ++again: ++ err = getaddrinfo(node, service, hints, res); + if (err) { + const char *errstr; + +diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +index 9934a68df237..e934dd26a59d 100644 +--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c ++++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, + struct addrinfo *hints, + struct addrinfo **res) + { +-again: +- int err = getaddrinfo(node, service, hints, res); ++ int err; + ++again: ++ err = getaddrinfo(node, service, hints, res); + if (err) { + const char *errstr; + +-- +2.50.1 + diff --git a/queue-6.16/series b/queue-6.16/series index 181f2ef40a..63bac638a8 100644 --- a/queue-6.16/series +++ b/queue-6.16/series @@ -191,3 +191,20 @@ drm-amd-display-fix-initial-backlight-brightness-calculation.patch drm-amd-display-pass-up-errors-for-reset-gpu-that-fails-to-init-hw.patch drm-amd-display-revert-drm-amd-display-fix-amdgpu_max_bl_level-value.patch drm-amd-display-don-t-overwrite-dce60_clk_mgr.patch +loongarch-kvm-make-function-kvm_own_lbt-robust.patch +loongarch-kvm-fix-stack-protector-issue-in-send_ipi_data.patch +loongarch-kvm-add-address-alignment-check-in-pch_pic-register-access.patch +net-hsr-reject-hsr-frame-if-skb-can-t-hold-tag.patch +sched-ext-fix-invalid-task-state-transitions-on-class-switch.patch +ipv6-sr-fix-mac-comparison-to-be-constant-time.patch +cgroup-avoid-null-de-ref-in-css_rstat_exit.patch +cpuidle-governors-menu-avoid-selecting-states-with-too-much-latency.patch +acpi-pfr_update-fix-the-driver-update-version-check.patch +acpi-apei-einj-fix-resource-leak-by-remove-callback-in-.exit.text.patch +mptcp-drop-skb-if-mptcp-skb-extension-allocation-fails.patch +mptcp-pm-kernel-flush-do-not-reset-add_addr-limit.patch +mptcp-remove-duplicate-sk_reset_timer-call.patch +mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch +selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch +selftests-mptcp-connect-fix-c23-extension-warning.patch +selftests-mptcp-sockopt-fix-c23-extension-warning.patch