From 574f79fe28abca51215bbf45311bd8a9ccccc786 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Nov 2025 10:29:50 +0900 Subject: [PATCH] 6.12-stable patches added patches: acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch acpi-fan-use-platform-device-for-devres-related-actions.patch cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch mptcp-cleanup-mem-accounting.patch mptcp-fix-msg_peek-stream-corruption.patch mptcp-leverage-skb-deferral-free.patch mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch net-phy-add-phy_disable_eee.patch net-phy-dp83867-disable-eee-support-as-not-implemented.patch sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch --- ...ed-reporting-for-fans-with-only-_fst.patch | 231 +++++++++++ ...rm-device-for-devres-related-actions.patch | 78 ++++ ...u-rearrange-main-loop-in-menu_select.patch | 111 ++++++ ...ect-polling-state-in-some-more-cases.patch | 54 +++ queue-6.12/mptcp-cleanup-mem-accounting.patch | 298 +++++++++++++++ ...mptcp-fix-msg_peek-stream-corruption.patch | 119 ++++++ .../mptcp-leverage-skb-deferral-free.patch | 50 +++ ...ath-under-msk-socket-lock-protection.patch | 359 ++++++++++++++++++ queue-6.12/net-phy-add-phy_disable_eee.patch | 62 +++ ...sable-eee-support-as-not-implemented.patch | 54 +++ ...rk-scx_bpf_dsq_move_set_-with-kf_rcu.patch | 50 +++ queue-6.12/series | 11 + 12 files changed, 1477 insertions(+) create mode 100644 queue-6.12/acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch create mode 100644 queue-6.12/acpi-fan-use-platform-device-for-devres-related-actions.patch create mode 100644 queue-6.12/cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch create mode 100644 queue-6.12/cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch create mode 100644 queue-6.12/mptcp-cleanup-mem-accounting.patch create mode 100644 queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch create mode 100644 queue-6.12/mptcp-leverage-skb-deferral-free.patch create mode 100644 queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch create mode 100644 queue-6.12/net-phy-add-phy_disable_eee.patch create mode 100644 queue-6.12/net-phy-dp83867-disable-eee-support-as-not-implemented.patch create mode 100644 queue-6.12/sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch diff --git a/queue-6.12/acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch b/queue-6.12/acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch new file mode 100644 index 0000000000..c01fb5857c --- /dev/null +++ b/queue-6.12/acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch @@ -0,0 +1,231 @@ +From stable+bounces-192060-greg=kroah.com@vger.kernel.org Sun Nov 2 23:35:24 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 09:35:13 -0500 +Subject: ACPI: fan: Add fan speed reporting for fans with only _FST +To: stable@vger.kernel.org +Cc: Joshua Grisham , Armin Wolf , "Rafael J. Wysocki" , Sasha Levin +Message-ID: <20251102143514.3449278-1-sashal@kernel.org> + +From: Joshua Grisham + +[ Upstream commit 6c00f29f74cb2c063b6f31a0b6d73f9db132b9ac ] + +Add support for ACPI fans with _FST to report their speed even if they do +not support fan control. + +As suggested by Armin Wolf [1] and per the Windows Thermal Management +Design Guide [2], Samsung Galaxy Book series devices (and possibly many +more devices where the Windows guide was strictly followed) only implement +the _FST method and do not support ACPI-based fan control. + +Currently, these fans are not supported by the kernel driver but this patch +will make some very small adjustments to allow them to be supported. + +This patch is tested and working for me on a Samsung Galaxy Book2 Pro whose +DSDT (and several other Samsung Galaxy Book series notebooks which +currently have the same issue) can be found at [3]. + +Link: https://lore.kernel.org/platform-driver-x86/53c5075b-1967-45d0-937f-463912dd966d@gmx.de [1] +Link: https://learn.microsoft.com/en-us/windows-hardware/design/device-experiences/design-guide [2] +Link: https://github.com/joshuagrisham/samsung-galaxybook-extras/tree/8e3087a06b8bdcdfdd081367af4b744a56cc4ee9/dsdt [3] + +Signed-off-by: Joshua Grisham +Reviewed-by: Armin Wolf +Link: https://patch.msgid.link/20250222094407.9753-1-josh@joshuagrisham.com +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: d91a1d129b63 ("ACPI: fan: Use platform device for devres-related actions") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/fan.h | 1 + + drivers/acpi/fan_attr.c | 37 ++++++++++++++++++++++--------------- + drivers/acpi/fan_core.c | 25 ++++++++++++++++++------- + drivers/acpi/fan_hwmon.c | 8 ++++++++ + 4 files changed, 49 insertions(+), 22 deletions(-) + +--- a/drivers/acpi/fan.h ++++ b/drivers/acpi/fan.h +@@ -49,6 +49,7 @@ struct acpi_fan_fst { + struct acpi_fan { + acpi_handle handle; + bool acpi4; ++ bool has_fst; + struct acpi_fan_fif fif; + struct acpi_fan_fps *fps; + int fps_count; +--- a/drivers/acpi/fan_attr.c ++++ b/drivers/acpi/fan_attr.c +@@ -75,15 +75,6 @@ int acpi_fan_create_attributes(struct ac + struct acpi_fan *fan = acpi_driver_data(device); + int i, status; + +- sysfs_attr_init(&fan->fine_grain_control.attr); +- fan->fine_grain_control.show = show_fine_grain_control; +- fan->fine_grain_control.store = NULL; +- fan->fine_grain_control.attr.name = "fine_grain_control"; +- fan->fine_grain_control.attr.mode = 0444; +- status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr); +- if (status) +- return status; +- + /* _FST is present if we are here */ + sysfs_attr_init(&fan->fst_speed.attr); + fan->fst_speed.show = show_fan_speed; +@@ -92,7 +83,19 @@ int acpi_fan_create_attributes(struct ac + fan->fst_speed.attr.mode = 0444; + status = sysfs_create_file(&device->dev.kobj, &fan->fst_speed.attr); + if (status) +- goto rem_fine_grain_attr; ++ return status; ++ ++ if (!fan->acpi4) ++ return 0; ++ ++ sysfs_attr_init(&fan->fine_grain_control.attr); ++ fan->fine_grain_control.show = show_fine_grain_control; ++ fan->fine_grain_control.store = NULL; ++ fan->fine_grain_control.attr.name = "fine_grain_control"; ++ fan->fine_grain_control.attr.mode = 0444; ++ status = sysfs_create_file(&device->dev.kobj, &fan->fine_grain_control.attr); ++ if (status) ++ goto rem_fst_attr; + + for (i = 0; i < fan->fps_count; ++i) { + struct acpi_fan_fps *fps = &fan->fps[i]; +@@ -109,18 +112,18 @@ int acpi_fan_create_attributes(struct ac + + for (j = 0; j < i; ++j) + sysfs_remove_file(&device->dev.kobj, &fan->fps[j].dev_attr.attr); +- goto rem_fst_attr; ++ goto rem_fine_grain_attr; + } + } + + return 0; + +-rem_fst_attr: +- sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); +- + rem_fine_grain_attr: + sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr); + ++rem_fst_attr: ++ sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); ++ + return status; + } + +@@ -129,9 +132,13 @@ void acpi_fan_delete_attributes(struct a + struct acpi_fan *fan = acpi_driver_data(device); + int i; + ++ sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); ++ ++ if (!fan->acpi4) ++ return; ++ + for (i = 0; i < fan->fps_count; ++i) + sysfs_remove_file(&device->dev.kobj, &fan->fps[i].dev_attr.attr); + +- sysfs_remove_file(&device->dev.kobj, &fan->fst_speed.attr); + sysfs_remove_file(&device->dev.kobj, &fan->fine_grain_control.attr); + } +--- a/drivers/acpi/fan_core.c ++++ b/drivers/acpi/fan_core.c +@@ -208,12 +208,16 @@ static const struct thermal_cooling_devi + * -------------------------------------------------------------------------- + */ + ++static bool acpi_fan_has_fst(struct acpi_device *device) ++{ ++ return acpi_has_method(device->handle, "_FST"); ++} ++ + static bool acpi_fan_is_acpi4(struct acpi_device *device) + { + return acpi_has_method(device->handle, "_FIF") && + acpi_has_method(device->handle, "_FPS") && +- acpi_has_method(device->handle, "_FSL") && +- acpi_has_method(device->handle, "_FST"); ++ acpi_has_method(device->handle, "_FSL"); + } + + static int acpi_fan_get_fif(struct acpi_device *device) +@@ -337,7 +341,12 @@ static int acpi_fan_probe(struct platfor + device->driver_data = fan; + platform_set_drvdata(pdev, fan); + +- if (acpi_fan_is_acpi4(device)) { ++ if (acpi_fan_has_fst(device)) { ++ fan->has_fst = true; ++ fan->acpi4 = acpi_fan_is_acpi4(device); ++ } ++ ++ if (fan->acpi4) { + result = acpi_fan_get_fif(device); + if (result) + return result; +@@ -345,7 +354,9 @@ static int acpi_fan_probe(struct platfor + result = acpi_fan_get_fps(device); + if (result) + return result; ++ } + ++ if (fan->has_fst) { + result = devm_acpi_fan_create_hwmon(device); + if (result) + return result; +@@ -353,9 +364,9 @@ static int acpi_fan_probe(struct platfor + result = acpi_fan_create_attributes(device); + if (result) + return result; ++ } + +- fan->acpi4 = true; +- } else { ++ if (!fan->acpi4) { + result = acpi_device_update_power(device, NULL); + if (result) { + dev_err(&device->dev, "Failed to set initial power state\n"); +@@ -401,7 +412,7 @@ err_remove_link: + err_unregister: + thermal_cooling_device_unregister(cdev); + err_end: +- if (fan->acpi4) ++ if (fan->has_fst) + acpi_fan_delete_attributes(device); + + return result; +@@ -411,7 +422,7 @@ static void acpi_fan_remove(struct platf + { + struct acpi_fan *fan = platform_get_drvdata(pdev); + +- if (fan->acpi4) { ++ if (fan->has_fst) { + struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + + acpi_fan_delete_attributes(device); +--- a/drivers/acpi/fan_hwmon.c ++++ b/drivers/acpi/fan_hwmon.c +@@ -43,6 +43,10 @@ static umode_t acpi_fan_hwmon_is_visible + case hwmon_fan_input: + return 0444; + case hwmon_fan_target: ++ /* Only acpi4 fans support fan control. */ ++ if (!fan->acpi4) ++ return 0; ++ + /* + * When in fine grain control mode, not every fan control value + * has an associated fan performance state. +@@ -57,6 +61,10 @@ static umode_t acpi_fan_hwmon_is_visible + case hwmon_power: + switch (attr) { + case hwmon_power_input: ++ /* Only acpi4 fans support fan control. */ ++ if (!fan->acpi4) ++ return 0; ++ + /* + * When in fine grain control mode, not every fan control value + * has an associated fan performance state. diff --git a/queue-6.12/acpi-fan-use-platform-device-for-devres-related-actions.patch b/queue-6.12/acpi-fan-use-platform-device-for-devres-related-actions.patch new file mode 100644 index 0000000000..3b2d2eb896 --- /dev/null +++ b/queue-6.12/acpi-fan-use-platform-device-for-devres-related-actions.patch @@ -0,0 +1,78 @@ +From stable+bounces-192061-greg=kroah.com@vger.kernel.org Sun Nov 2 23:35:23 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 09:35:14 -0500 +Subject: ACPI: fan: Use platform device for devres-related actions +To: stable@vger.kernel.org +Cc: Armin Wolf , "Rafael J. Wysocki" , Sasha Levin +Message-ID: <20251102143514.3449278-2-sashal@kernel.org> + +From: Armin Wolf + +[ Upstream commit d91a1d129b63614fa4c2e45e60918409ce36db7e ] + +Device-managed resources are cleaned up when the driver unbinds from +the underlying device. In our case this is the platform device as this +driver is a platform driver. Registering device-managed resources on +the associated ACPI device will thus result in a resource leak when +this driver unbinds. + +Ensure that any device-managed resources are only registered on the +platform device to ensure that they are cleaned up during removal. + +Fixes: 35c50d853adc ("ACPI: fan: Add hwmon support") +Signed-off-by: Armin Wolf +Cc: 6.11+ # 6.11+ +Link: https://patch.msgid.link/20251007234149.2769-4-W_Armin@gmx.de +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/fan.h | 4 ++-- + drivers/acpi/fan_core.c | 2 +- + drivers/acpi/fan_hwmon.c | 8 ++++---- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/acpi/fan.h ++++ b/drivers/acpi/fan.h +@@ -63,9 +63,9 @@ int acpi_fan_create_attributes(struct ac + void acpi_fan_delete_attributes(struct acpi_device *device); + + #if IS_REACHABLE(CONFIG_HWMON) +-int devm_acpi_fan_create_hwmon(struct acpi_device *device); ++int devm_acpi_fan_create_hwmon(struct device *dev); + #else +-static inline int devm_acpi_fan_create_hwmon(struct acpi_device *device) { return 0; }; ++static inline int devm_acpi_fan_create_hwmon(struct device *dev) { return 0; }; + #endif + + #endif +--- a/drivers/acpi/fan_core.c ++++ b/drivers/acpi/fan_core.c +@@ -357,7 +357,7 @@ static int acpi_fan_probe(struct platfor + } + + if (fan->has_fst) { +- result = devm_acpi_fan_create_hwmon(device); ++ result = devm_acpi_fan_create_hwmon(&pdev->dev); + if (result) + return result; + +--- a/drivers/acpi/fan_hwmon.c ++++ b/drivers/acpi/fan_hwmon.c +@@ -166,12 +166,12 @@ static const struct hwmon_chip_info acpi + .info = acpi_fan_hwmon_info, + }; + +-int devm_acpi_fan_create_hwmon(struct acpi_device *device) ++int devm_acpi_fan_create_hwmon(struct device *dev) + { +- struct acpi_fan *fan = acpi_driver_data(device); ++ struct acpi_fan *fan = dev_get_drvdata(dev); + struct device *hdev; + +- hdev = devm_hwmon_device_register_with_info(&device->dev, "acpi_fan", fan, +- &acpi_fan_hwmon_chip_info, NULL); ++ hdev = devm_hwmon_device_register_with_info(dev, "acpi_fan", fan, &acpi_fan_hwmon_chip_info, ++ NULL); + return PTR_ERR_OR_ZERO(hdev); + } diff --git a/queue-6.12/cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch b/queue-6.12/cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch new file mode 100644 index 0000000000..73879a46d5 --- /dev/null +++ b/queue-6.12/cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch @@ -0,0 +1,111 @@ +From stable+bounces-192075-greg=kroah.com@vger.kernel.org Mon Nov 3 03:57:39 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 13:57:29 -0500 +Subject: cpuidle: governors: menu: Rearrange main loop in menu_select() +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Christian Loehle , Sasha Levin +Message-ID: <20251102185730.3551603-1-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit 17224c1d2574d29668c4879e1fbf36d6f68cd22b ] + +Reduce the indentation level in the main loop of menu_select() by +rearranging some checks and assignments in it. + +No intentional functional impact. + +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Christian Loehle +Link: https://patch.msgid.link/2389215.ElGaqSPkdT@rafael.j.wysocki +Stable-dep-of: db86f55bf81a ("cpuidle: governors: menu: Select polling state in some more cases") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpuidle/governors/menu.c | 70 ++++++++++++++++++++------------------- + 1 file changed, 36 insertions(+), 34 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -317,45 +317,47 @@ static int menu_select(struct cpuidle_dr + if (s->exit_latency_ns > latency_req) + break; + +- if (s->target_residency_ns > predicted_ns) { +- /* +- * Use a physical idle state, not busy polling, unless +- * a timer is going to trigger soon enough. +- */ +- if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && +- s->target_residency_ns <= data->next_timer_ns) { +- predicted_ns = s->target_residency_ns; +- idx = i; +- break; +- } +- if (predicted_ns < TICK_NSEC) +- break; +- +- if (!tick_nohz_tick_stopped()) { +- /* +- * If the state selected so far is shallow, +- * waking up early won't hurt, so retain the +- * tick in that case and let the governor run +- * again in the next iteration of the loop. +- */ +- predicted_ns = drv->states[idx].target_residency_ns; +- break; +- } ++ if (s->target_residency_ns <= predicted_ns) { ++ idx = i; ++ continue; ++ } ++ ++ /* ++ * Use a physical idle state, not busy polling, unless a timer ++ * is going to trigger soon enough. ++ */ ++ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && ++ s->target_residency_ns <= data->next_timer_ns) { ++ predicted_ns = s->target_residency_ns; ++ idx = i; ++ break; ++ } + ++ if (predicted_ns < TICK_NSEC) ++ break; ++ ++ if (!tick_nohz_tick_stopped()) { + /* +- * If the state selected so far is shallow and this +- * state's target residency matches the time till the +- * closest timer event, select this one to avoid getting +- * stuck in the shallow one for too long. ++ * If the state selected so far is shallow, waking up ++ * early won't hurt, so retain the tick in that case and ++ * let the governor run again in the next iteration of ++ * the idle loop. + */ +- if (drv->states[idx].target_residency_ns < TICK_NSEC && +- s->target_residency_ns <= delta_tick) +- idx = i; +- +- return idx; ++ predicted_ns = drv->states[idx].target_residency_ns; ++ break; + } + +- idx = i; ++ /* ++ * If the state selected so far is shallow and this state's ++ * target residency matches the time till the closest timer ++ * event, select this one to avoid getting stuck in the shallow ++ * one for too long. ++ */ ++ if (drv->states[idx].target_residency_ns < TICK_NSEC && ++ s->target_residency_ns <= delta_tick) ++ idx = i; ++ ++ return idx; + } + + if (idx == -1) diff --git a/queue-6.12/cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch b/queue-6.12/cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch new file mode 100644 index 0000000000..4e43593f3a --- /dev/null +++ b/queue-6.12/cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch @@ -0,0 +1,54 @@ +From stable+bounces-192076-greg=kroah.com@vger.kernel.org Mon Nov 3 03:57:39 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 13:57:30 -0500 +Subject: cpuidle: governors: menu: Select polling state in some more cases +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Doug Smythies , Christian Loehle , Sasha Levin +Message-ID: <20251102185730.3551603-2-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit db86f55bf81a3a297be05ee8775ae9a8c6e3a599 ] + +A throughput regression of 11% introduced by commit 779b1a1cb13a ("cpuidle: +governors: menu: Avoid selecting states with too much latency") has been +reported and it is related to the case when the menu governor checks if +selecting a proper idle state instead of a polling one makes sense. + +In particular, it is questionable to do so if the exit latency of the +idle state in question exceeds the predicted idle duration, so add a +check for that, which is sufficient to make the reported regression go +away, and update the related code comment accordingly. + +Fixes: 779b1a1cb13a ("cpuidle: governors: menu: Avoid selecting states with too much latency") +Closes: https://lore.kernel.org/linux-pm/004501dc43c9$ec8aa930$c59ffb90$@telus.net/ +Reported-by: Doug Smythies +Tested-by: Doug Smythies +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Christian Loehle +Link: https://patch.msgid.link/12786727.O9o76ZdvQC@rafael.j.wysocki +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpuidle/governors/menu.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -324,10 +324,13 @@ static int menu_select(struct cpuidle_dr + + /* + * Use a physical idle state, not busy polling, unless a timer +- * is going to trigger soon enough. ++ * is going to trigger soon enough or the exit latency of the ++ * idle state in question is greater than the predicted idle ++ * duration. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && +- s->target_residency_ns <= data->next_timer_ns) { ++ s->target_residency_ns <= data->next_timer_ns && ++ s->exit_latency_ns <= predicted_ns) { + predicted_ns = s->target_residency_ns; + idx = i; + break; diff --git a/queue-6.12/mptcp-cleanup-mem-accounting.patch b/queue-6.12/mptcp-cleanup-mem-accounting.patch new file mode 100644 index 0000000000..6e1a8b47e6 --- /dev/null +++ b/queue-6.12/mptcp-cleanup-mem-accounting.patch @@ -0,0 +1,298 @@ +From stable+bounces-192096-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:44 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 18:27:33 -0500 +Subject: mptcp: cleanup mem accounting +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20251102232735.3652847-2-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 6639498ed85fdb135dfb0dfbcc0f540b2d4ad6a6 ] + +After the previous patch, updating sk_forward_memory is cheap and +we can drop a lot of complexity from the MPTCP memory accounting, +removing the custom fwd mem allocations for rmem. + +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-4-4a47d90d7998@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/fastopen.c | 2 + net/mptcp/protocol.c | 115 +++------------------------------------------------ + net/mptcp/protocol.h | 4 - + 3 files changed, 10 insertions(+), 111 deletions(-) + +--- a/net/mptcp/fastopen.c ++++ b/net/mptcp/fastopen.c +@@ -51,7 +51,7 @@ void mptcp_fastopen_subflow_synack_set_p + mptcp_data_lock(sk); + DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); + +- mptcp_set_owner_r(skb, sk); ++ skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + mptcp_sk(sk)->bytes_received += skb->len; + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -118,17 +118,6 @@ static void mptcp_drop(struct sock *sk, + __kfree_skb(skb); + } + +-static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) +-{ +- WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, +- mptcp_sk(sk)->rmem_fwd_alloc + size); +-} +- +-static void mptcp_rmem_charge(struct sock *sk, int size) +-{ +- mptcp_rmem_fwd_alloc_add(sk, -size); +-} +- + static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, + struct sk_buff *from) + { +@@ -150,7 +139,7 @@ static bool mptcp_try_coalesce(struct so + * negative one + */ + atomic_add(delta, &sk->sk_rmem_alloc); +- mptcp_rmem_charge(sk, delta); ++ sk_mem_charge(sk, delta); + kfree_skb_partial(from, fragstolen); + + return true; +@@ -165,44 +154,6 @@ static bool mptcp_ooo_try_coalesce(struc + return mptcp_try_coalesce((struct sock *)msk, to, from); + } + +-static void __mptcp_rmem_reclaim(struct sock *sk, int amount) +-{ +- amount >>= PAGE_SHIFT; +- mptcp_rmem_charge(sk, amount << PAGE_SHIFT); +- __sk_mem_reduce_allocated(sk, amount); +-} +- +-static void mptcp_rmem_uncharge(struct sock *sk, int size) +-{ +- struct mptcp_sock *msk = mptcp_sk(sk); +- int reclaimable; +- +- mptcp_rmem_fwd_alloc_add(sk, size); +- reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); +- +- /* see sk_mem_uncharge() for the rationale behind the following schema */ +- if (unlikely(reclaimable >= PAGE_SIZE)) +- __mptcp_rmem_reclaim(sk, reclaimable); +-} +- +-static void mptcp_rfree(struct sk_buff *skb) +-{ +- unsigned int len = skb->truesize; +- struct sock *sk = skb->sk; +- +- atomic_sub(len, &sk->sk_rmem_alloc); +- mptcp_rmem_uncharge(sk, len); +-} +- +-void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) +-{ +- skb_orphan(skb); +- skb->sk = sk; +- skb->destructor = mptcp_rfree; +- atomic_add(skb->truesize, &sk->sk_rmem_alloc); +- mptcp_rmem_charge(sk, skb->truesize); +-} +- + /* "inspired" by tcp_data_queue_ofo(), main differences: + * - use mptcp seqs + * - don't cope with sacks +@@ -315,25 +266,7 @@ merge_right: + + end: + skb_condense(skb); +- mptcp_set_owner_r(skb, sk); +-} +- +-static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) +-{ +- struct mptcp_sock *msk = mptcp_sk(sk); +- int amt, amount; +- +- if (size <= msk->rmem_fwd_alloc) +- return true; +- +- size -= msk->rmem_fwd_alloc; +- amt = sk_mem_pages(size); +- amount = amt << PAGE_SHIFT; +- if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) +- return false; +- +- mptcp_rmem_fwd_alloc_add(sk, amount); +- return true; ++ skb_set_owner_r(skb, sk); + } + + static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, +@@ -351,7 +284,7 @@ static bool __mptcp_move_skb(struct mptc + skb_orphan(skb); + + /* try to fetch required memory from subflow */ +- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { ++ if (!sk_rmem_schedule(sk, skb, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + goto drop; + } +@@ -375,7 +308,7 @@ static bool __mptcp_move_skb(struct mptc + if (tail && mptcp_try_coalesce(sk, tail, skb)) + return true; + +- mptcp_set_owner_r(skb, sk); ++ skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + return true; + } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { +@@ -2016,9 +1949,10 @@ static int __mptcp_recvmsg_mskq(struct s + } + + if (!(flags & MSG_PEEK)) { +- /* we will bulk release the skb memory later */ ++ /* avoid the indirect call, we know the destructor is sock_wfree */ + skb->destructor = NULL; +- WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); ++ atomic_sub(skb->truesize, &sk->sk_rmem_alloc); ++ sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); + __kfree_skb(skb); + msk->bytes_consumed += count; +@@ -2132,18 +2066,6 @@ new_measure: + msk->rcvq_space.time = mstamp; + } + +-static void __mptcp_update_rmem(struct sock *sk) +-{ +- struct mptcp_sock *msk = mptcp_sk(sk); +- +- if (!msk->rmem_released) +- return; +- +- atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); +- mptcp_rmem_uncharge(sk, msk->rmem_released); +- WRITE_ONCE(msk->rmem_released, 0); +-} +- + static bool __mptcp_move_skbs(struct sock *sk) + { + struct mptcp_subflow_context *subflow; +@@ -2167,7 +2089,6 @@ static bool __mptcp_move_skbs(struct soc + break; + + slowpath = lock_sock_fast(ssk); +- __mptcp_update_rmem(sk); + done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + + if (unlikely(ssk->sk_err)) +@@ -2175,12 +2096,7 @@ static bool __mptcp_move_skbs(struct soc + unlock_sock_fast(ssk, slowpath); + } while (!done); + +- ret = moved > 0; +- if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || +- !skb_queue_empty(&sk->sk_receive_queue)) { +- __mptcp_update_rmem(sk); +- ret |= __mptcp_ofo_queue(msk); +- } ++ ret = moved > 0 || __mptcp_ofo_queue(msk); + if (ret) + mptcp_check_data_fin((struct sock *)msk); + return ret; +@@ -2859,8 +2775,6 @@ static void __mptcp_init_sock(struct soc + INIT_WORK(&msk->work, mptcp_worker); + msk->out_of_order_queue = RB_ROOT; + msk->first_pending = NULL; +- WRITE_ONCE(msk->rmem_fwd_alloc, 0); +- WRITE_ONCE(msk->rmem_released, 0); + msk->timer_ival = TCP_RTO_MIN; + msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; + +@@ -3088,8 +3002,6 @@ static void __mptcp_destroy_sock(struct + + sk->sk_prot->destroy(sk); + +- WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); +- WARN_ON_ONCE(msk->rmem_released); + sk_stream_kill_queues(sk); + xfrm_sk_free_policy(sk); + +@@ -3458,8 +3370,6 @@ void mptcp_destroy_common(struct mptcp_s + /* move all the rx fwd alloc into the sk_mem_reclaim_final in + * inet_sock_destruct() will dispose it + */ +- sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); +- WRITE_ONCE(msk->rmem_fwd_alloc, 0); + mptcp_token_destroy(msk); + mptcp_pm_free_anno_list(msk); + mptcp_free_local_addr_list(msk); +@@ -3552,8 +3462,6 @@ static void mptcp_release_cb(struct sock + if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) + __mptcp_sync_sndbuf(sk); + } +- +- __mptcp_update_rmem(sk); + } + + /* MP_JOIN client subflow must wait for 4th ack before sending any data: +@@ -3729,12 +3637,6 @@ static void mptcp_shutdown(struct sock * + __mptcp_wr_shutdown(sk); + } + +-static int mptcp_forward_alloc_get(const struct sock *sk) +-{ +- return READ_ONCE(sk->sk_forward_alloc) + +- READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); +-} +- + static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) + { + const struct sock *sk = (void *)msk; +@@ -3893,7 +3795,6 @@ static struct proto mptcp_prot = { + .hash = mptcp_hash, + .unhash = mptcp_unhash, + .get_port = mptcp_get_port, +- .forward_alloc_get = mptcp_forward_alloc_get, + .stream_memory_free = mptcp_stream_memory_free, + .sockets_allocated = &mptcp_sockets_allocated, + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -280,7 +280,6 @@ struct mptcp_sock { + u64 rcv_data_fin_seq; + u64 bytes_retrans; + u64 bytes_consumed; +- int rmem_fwd_alloc; + int snd_burst; + int old_wspace; + u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; +@@ -295,7 +294,6 @@ struct mptcp_sock { + u32 last_ack_recv; + unsigned long timer_ival; + u32 token; +- int rmem_released; + unsigned long flags; + unsigned long cb_flags; + bool recovery; /* closing subflow write queue reinjected */ +@@ -392,7 +390,7 @@ static inline void msk_owned_by_me(const + */ + static inline int __mptcp_rmem(const struct sock *sk) + { +- return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); ++ return atomic_read(&sk->sk_rmem_alloc); + } + + static inline int mptcp_win_from_space(const struct sock *sk, int space) diff --git a/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch b/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch new file mode 100644 index 0000000000..9f247ab561 --- /dev/null +++ b/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch @@ -0,0 +1,119 @@ +From stable+bounces-192098-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:48 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 18:27:35 -0500 +Subject: mptcp: fix MSG_PEEK stream corruption +To: stable@vger.kernel.org +Cc: Paolo Abeni , Geliang Tang , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20251102232735.3652847-4-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 8e04ce45a8db7a080220e86e249198fa676b83dc ] + +If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the +receive queue and recvmsg() need to waits for more data - i.e. it's a +blocking one - upon arrival of the next packet the MPTCP protocol will +start again copying the oldest data present in the receive queue, +corrupting the data stream. + +Address the issue explicitly tracking the peeked sequence number, +restarting from the last peeked byte. + +Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Geliang Tang +Tested-by: Geliang Tang +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 38 +++++++++++++++++++++++++------------- + 1 file changed, 25 insertions(+), 13 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1907,22 +1907,36 @@ do_error: + + static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); + +-static int __mptcp_recvmsg_mskq(struct sock *sk, +- struct msghdr *msg, +- size_t len, int flags, ++static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, ++ size_t len, int flags, int copied_total, + struct scm_timestamping_internal *tss, + int *cmsg_flags) + { + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb, *tmp; ++ int total_data_len = 0; + int copied = 0; + + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { +- u32 offset = MPTCP_SKB_CB(skb)->offset; ++ u32 delta, offset = MPTCP_SKB_CB(skb)->offset; + u32 data_len = skb->len - offset; +- u32 count = min_t(size_t, len - copied, data_len); ++ u32 count; + int err; + ++ if (flags & MSG_PEEK) { ++ /* skip already peeked skbs */ ++ if (total_data_len + data_len <= copied_total) { ++ total_data_len += data_len; ++ continue; ++ } ++ ++ /* skip the already peeked data in the current skb */ ++ delta = copied_total - total_data_len; ++ offset += delta; ++ data_len -= delta; ++ } ++ ++ count = min_t(size_t, len - copied, data_len); + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_msg(skb, offset, msg, count); + if (unlikely(err < 0)) { +@@ -1939,16 +1953,14 @@ static int __mptcp_recvmsg_mskq(struct s + + copied += count; + +- if (count < data_len) { +- if (!(flags & MSG_PEEK)) { ++ if (!(flags & MSG_PEEK)) { ++ msk->bytes_consumed += count; ++ if (count < data_len) { + MPTCP_SKB_CB(skb)->offset += count; + MPTCP_SKB_CB(skb)->map_seq += count; +- msk->bytes_consumed += count; ++ break; + } +- break; +- } + +- if (!(flags & MSG_PEEK)) { + /* avoid the indirect call, we know the destructor is sock_rfree */ + skb->destructor = NULL; + skb->sk = NULL; +@@ -1956,7 +1968,6 @@ static int __mptcp_recvmsg_mskq(struct s + sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); + skb_attempt_defer_free(skb); +- msk->bytes_consumed += count; + } + + if (copied >= len) +@@ -2154,7 +2165,8 @@ static int mptcp_recvmsg(struct sock *sk + while (copied < len) { + int err, bytes_read; + +- bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); ++ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, ++ copied, &tss, &cmsg_flags); + if (unlikely(bytes_read < 0)) { + if (!copied) + copied = bytes_read; diff --git a/queue-6.12/mptcp-leverage-skb-deferral-free.patch b/queue-6.12/mptcp-leverage-skb-deferral-free.patch new file mode 100644 index 0000000000..a099c64a93 --- /dev/null +++ b/queue-6.12/mptcp-leverage-skb-deferral-free.patch @@ -0,0 +1,50 @@ +From stable+bounces-192097-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:49 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 18:27:34 -0500 +Subject: mptcp: leverage skb deferral free +To: stable@vger.kernel.org +Cc: Paolo Abeni , Geliang Tang , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20251102232735.3652847-3-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit 9aa59323f2709370cb4f01acbba599a9167f317b ] + +Usage of the skb deferral API is straight-forward; with multiple +subflows actives this allow moving part of the received application +load into multiple CPUs. + +Also fix a typo in the related comment. + +Reviewed-by: Geliang Tang +Tested-by: Geliang Tang +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Paolo Abeni +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250927-net-next-mptcp-rcv-path-imp-v1-1-5da266aa9c1a@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1949,12 +1949,13 @@ static int __mptcp_recvmsg_mskq(struct s + } + + if (!(flags & MSG_PEEK)) { +- /* avoid the indirect call, we know the destructor is sock_wfree */ ++ /* avoid the indirect call, we know the destructor is sock_rfree */ + skb->destructor = NULL; ++ skb->sk = NULL; + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, skb->truesize); + __skb_unlink(skb, &sk->sk_receive_queue); +- __kfree_skb(skb); ++ skb_attempt_defer_free(skb); + msk->bytes_consumed += count; + } + diff --git a/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch b/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch new file mode 100644 index 0000000000..d6d527201a --- /dev/null +++ b/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch @@ -0,0 +1,359 @@ +From stable+bounces-192095-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:43 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 18:27:32 -0500 +Subject: mptcp: move the whole rx path under msk socket lock protection +To: stable@vger.kernel.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin +Message-ID: <20251102232735.3652847-1-sashal@kernel.org> + +From: Paolo Abeni + +[ Upstream commit bc68b0efa1bf923cef1294a631d8e7416c7e06e4 ] + +After commit c2e6048fa1cf ("mptcp: fix race in release_cb") we can +move the whole MPTCP rx path under the socket lock leveraging the +release_cb. + +We can drop a bunch of spin_lock pairs in the receive functions, use +a single receive queue and invoke __mptcp_move_skbs only when subflows +ask for it. + +This will allow more cleanup in the next patch. + +Some changes are worth specific mention: + +The msk rcvbuf update now always happens under both the msk and the +subflow socket lock: we can drop a bunch of ONCE annotation and +consolidate the checks. + +When the skbs move is delayed at msk release callback time, even the +msk rcvbuf update is delayed; additionally take care of such action in +__mptcp_move_skbs(). + +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-3-4a47d90d7998@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/fastopen.c | 1 + net/mptcp/protocol.c | 123 ++++++++++++++++++++++++--------------------------- + net/mptcp/protocol.h | 2 + 3 files changed, 60 insertions(+), 66 deletions(-) + +--- a/net/mptcp/fastopen.c ++++ b/net/mptcp/fastopen.c +@@ -49,6 +49,7 @@ void mptcp_fastopen_subflow_synack_set_p + MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + mptcp_data_lock(sk); ++ DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); + + mptcp_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -658,18 +658,6 @@ static bool __mptcp_move_skbs_from_subfl + bool more_data_avail; + struct tcp_sock *tp; + bool done = false; +- int sk_rbuf; +- +- sk_rbuf = READ_ONCE(sk->sk_rcvbuf); +- +- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { +- int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); +- +- if (unlikely(ssk_rbuf > sk_rbuf)) { +- WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); +- sk_rbuf = ssk_rbuf; +- } +- } + + pr_debug("msk=%p ssk=%p\n", msk, ssk); + tp = tcp_sk(ssk); +@@ -737,7 +725,7 @@ static bool __mptcp_move_skbs_from_subfl + WRITE_ONCE(tp->copied_seq, seq); + more_data_avail = mptcp_subflow_data_available(ssk); + +- if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { ++ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { + done = true; + break; + } +@@ -861,11 +849,30 @@ static bool move_skbs_to_msk(struct mptc + return moved > 0; + } + ++static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk) ++{ ++ if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf)) ++ WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf); ++} ++ ++static void __mptcp_data_ready(struct sock *sk, struct sock *ssk) ++{ ++ struct mptcp_sock *msk = mptcp_sk(sk); ++ ++ __mptcp_rcvbuf_update(sk, ssk); ++ ++ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ ++ if (__mptcp_rmem(sk) > sk->sk_rcvbuf) ++ return; ++ ++ /* Wake-up the reader only for in-sequence data */ ++ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) ++ sk->sk_data_ready(sk); ++} ++ + void mptcp_data_ready(struct sock *sk, struct sock *ssk) + { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); +- struct mptcp_sock *msk = mptcp_sk(sk); +- int sk_rbuf, ssk_rbuf; + + /* The peer can send data while we are shutting down this + * subflow at msk destruction time, but we must avoid enqueuing +@@ -874,19 +881,11 @@ void mptcp_data_ready(struct sock *sk, s + if (unlikely(subflow->disposable)) + return; + +- ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); +- sk_rbuf = READ_ONCE(sk->sk_rcvbuf); +- if (unlikely(ssk_rbuf > sk_rbuf)) +- sk_rbuf = ssk_rbuf; +- +- /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ +- if (__mptcp_rmem(sk) > sk_rbuf) +- return; +- +- /* Wake-up the reader only for in-sequence data */ + mptcp_data_lock(sk); +- if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) +- sk->sk_data_ready(sk); ++ if (!sock_owned_by_user(sk)) ++ __mptcp_data_ready(sk, ssk); ++ else ++ __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); + mptcp_data_unlock(sk); + } + +@@ -1975,16 +1974,17 @@ do_error: + + static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); + +-static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, ++static int __mptcp_recvmsg_mskq(struct sock *sk, + struct msghdr *msg, + size_t len, int flags, + struct scm_timestamping_internal *tss, + int *cmsg_flags) + { ++ struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb, *tmp; + int copied = 0; + +- skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { ++ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { + u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 data_len = skb->len - offset; + u32 count = min_t(size_t, len - copied, data_len); +@@ -2019,7 +2019,7 @@ static int __mptcp_recvmsg_mskq(struct m + /* we will bulk release the skb memory later */ + skb->destructor = NULL; + WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); +- __skb_unlink(skb, &msk->receive_queue); ++ __skb_unlink(skb, &sk->sk_receive_queue); + __kfree_skb(skb); + msk->bytes_consumed += count; + } +@@ -2144,54 +2144,46 @@ static void __mptcp_update_rmem(struct s + WRITE_ONCE(msk->rmem_released, 0); + } + +-static void __mptcp_splice_receive_queue(struct sock *sk) ++static bool __mptcp_move_skbs(struct sock *sk) + { ++ struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); +- +- skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); +-} +- +-static bool __mptcp_move_skbs(struct mptcp_sock *msk) +-{ +- struct sock *sk = (struct sock *)msk; + unsigned int moved = 0; + bool ret, done; + ++ /* verify we can move any data from the subflow, eventually updating */ ++ if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) ++ mptcp_for_each_subflow(msk, subflow) ++ __mptcp_rcvbuf_update(sk, subflow->tcp_sock); ++ ++ if (__mptcp_rmem(sk) > sk->sk_rcvbuf) ++ return false; ++ + do { + struct sock *ssk = mptcp_subflow_recv_lookup(msk); + bool slowpath; + +- /* we can have data pending in the subflows only if the msk +- * receive buffer was full at subflow_data_ready() time, +- * that is an unlikely slow path. +- */ +- if (likely(!ssk)) ++ if (unlikely(!ssk)) + break; + + slowpath = lock_sock_fast(ssk); +- mptcp_data_lock(sk); + __mptcp_update_rmem(sk); + done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); +- mptcp_data_unlock(sk); + + if (unlikely(ssk->sk_err)) + __mptcp_error_report(sk); + unlock_sock_fast(ssk, slowpath); + } while (!done); + +- /* acquire the data lock only if some input data is pending */ + ret = moved > 0; + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || +- !skb_queue_empty_lockless(&sk->sk_receive_queue)) { +- mptcp_data_lock(sk); ++ !skb_queue_empty(&sk->sk_receive_queue)) { + __mptcp_update_rmem(sk); + ret |= __mptcp_ofo_queue(msk); +- __mptcp_splice_receive_queue(sk); +- mptcp_data_unlock(sk); + } + if (ret) + mptcp_check_data_fin((struct sock *)msk); +- return !skb_queue_empty(&msk->receive_queue); ++ return ret; + } + + static unsigned int mptcp_inq_hint(const struct sock *sk) +@@ -2199,7 +2191,7 @@ static unsigned int mptcp_inq_hint(const + const struct mptcp_sock *msk = mptcp_sk(sk); + const struct sk_buff *skb; + +- skb = skb_peek(&msk->receive_queue); ++ skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; + +@@ -2245,7 +2237,7 @@ static int mptcp_recvmsg(struct sock *sk + while (copied < len) { + int err, bytes_read; + +- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); ++ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); + if (unlikely(bytes_read < 0)) { + if (!copied) + copied = bytes_read; +@@ -2254,7 +2246,7 @@ static int mptcp_recvmsg(struct sock *sk + + copied += bytes_read; + +- if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) ++ if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) + continue; + + /* only the MPTCP socket status is relevant here. The exit +@@ -2280,7 +2272,7 @@ static int mptcp_recvmsg(struct sock *sk + /* race breaker: the shutdown could be after the + * previous receive queue check + */ +- if (__mptcp_move_skbs(msk)) ++ if (__mptcp_move_skbs(sk)) + continue; + break; + } +@@ -2324,9 +2316,8 @@ out_err: + } + } + +- pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", +- msk, skb_queue_empty_lockless(&sk->sk_receive_queue), +- skb_queue_empty(&msk->receive_queue), copied); ++ pr_debug("msk=%p rx queue empty=%d copied=%d\n", ++ msk, skb_queue_empty(&sk->sk_receive_queue), copied); + + release_sock(sk); + return copied; +@@ -2866,7 +2857,6 @@ static void __mptcp_init_sock(struct soc + INIT_LIST_HEAD(&msk->join_list); + INIT_LIST_HEAD(&msk->rtx_queue); + INIT_WORK(&msk->work, mptcp_worker); +- __skb_queue_head_init(&msk->receive_queue); + msk->out_of_order_queue = RB_ROOT; + msk->first_pending = NULL; + WRITE_ONCE(msk->rmem_fwd_alloc, 0); +@@ -3462,12 +3452,8 @@ void mptcp_destroy_common(struct mptcp_s + mptcp_for_each_subflow_safe(msk, subflow, tmp) + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); + +- /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ +- mptcp_data_lock(sk); +- skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_receive_queue); + skb_rbtree_purge(&msk->out_of_order_queue); +- mptcp_data_unlock(sk); + + /* move all the rx fwd alloc into the sk_mem_reclaim_final in + * inet_sock_destruct() will dispose it +@@ -3507,7 +3493,8 @@ void __mptcp_check_push(struct sock *sk, + + #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ + BIT(MPTCP_RETRANSMIT) | \ +- BIT(MPTCP_FLUSH_JOIN_LIST)) ++ BIT(MPTCP_FLUSH_JOIN_LIST) | \ ++ BIT(MPTCP_DEQUEUE)) + + /* processes deferred events and flush wmem */ + static void mptcp_release_cb(struct sock *sk) +@@ -3541,6 +3528,11 @@ static void mptcp_release_cb(struct sock + __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk); ++ if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) { ++ /* notify ack seq update */ ++ mptcp_cleanup_rbuf(msk, 0); ++ sk->sk_data_ready(sk); ++ } + + cond_resched(); + spin_lock_bh(&sk->sk_lock.slock); +@@ -3783,7 +3775,8 @@ static int mptcp_ioctl(struct sock *sk, + return -EINVAL; + + lock_sock(sk); +- __mptcp_move_skbs(msk); ++ if (__mptcp_move_skbs(sk)) ++ mptcp_cleanup_rbuf(msk, 0); + *karg = mptcp_inq_hint(sk); + release_sock(sk); + break; +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -124,6 +124,7 @@ + #define MPTCP_FLUSH_JOIN_LIST 5 + #define MPTCP_SYNC_STATE 6 + #define MPTCP_SYNC_SNDBUF 7 ++#define MPTCP_DEQUEUE 8 + + struct mptcp_skb_cb { + u64 map_seq; +@@ -324,7 +325,6 @@ struct mptcp_sock { + struct work_struct work; + struct sk_buff *ooo_last_skb; + struct rb_root out_of_order_queue; +- struct sk_buff_head receive_queue; + struct list_head conn_list; + struct list_head rtx_queue; + struct mptcp_data_frag *first_pending; diff --git a/queue-6.12/net-phy-add-phy_disable_eee.patch b/queue-6.12/net-phy-add-phy_disable_eee.patch new file mode 100644 index 0000000000..956ee480fe --- /dev/null +++ b/queue-6.12/net-phy-add-phy_disable_eee.patch @@ -0,0 +1,62 @@ +From stable+bounces-192063-greg=kroah.com@vger.kernel.org Sun Nov 2 23:47:24 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 09:46:45 -0500 +Subject: net: phy: add phy_disable_eee +To: stable@vger.kernel.org +Cc: Heiner Kallweit , Andrew Lunn , Jakub Kicinski , Sasha Levin +Message-ID: <20251102144646.3457653-1-sashal@kernel.org> + +From: Heiner Kallweit + +[ Upstream commit b55498ff14bd14860d48dc8d2a0b6889b218c408 ] + +If a MAC driver doesn't support EEE, then the PHY shouldn't advertise it. +Add phy_disable_eee() for this purpose. + +Signed-off-by: Heiner Kallweit +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/fd51738c-dcd6-4d61-b8c5-faa6ac0f1026@gmail.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 84a905290cb4 ("net: phy: dp83867: Disable EEE support as not implemented") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 16 ++++++++++++++++ + include/linux/phy.h | 1 + + 2 files changed, 17 insertions(+) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -3046,6 +3046,22 @@ void phy_support_eee(struct phy_device * + EXPORT_SYMBOL(phy_support_eee); + + /** ++ * phy_disable_eee - Disable EEE for the PHY ++ * @phydev: Target phy_device struct ++ * ++ * This function is used by MAC drivers for MAC's which don't support EEE. ++ * It disables EEE on the PHY layer. ++ */ ++void phy_disable_eee(struct phy_device *phydev) ++{ ++ linkmode_zero(phydev->supported_eee); ++ linkmode_zero(phydev->advertising_eee); ++ phydev->eee_cfg.tx_lpi_enabled = false; ++ phydev->eee_cfg.eee_enabled = false; ++} ++EXPORT_SYMBOL_GPL(phy_disable_eee); ++ ++/** + * phy_support_sym_pause - Enable support of symmetrical pause + * @phydev: target phy_device struct + * +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -2030,6 +2030,7 @@ void phy_advertise_eee_all(struct phy_de + void phy_support_sym_pause(struct phy_device *phydev); + void phy_support_asym_pause(struct phy_device *phydev); + void phy_support_eee(struct phy_device *phydev); ++void phy_disable_eee(struct phy_device *phydev); + void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, + bool autoneg); + void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); diff --git a/queue-6.12/net-phy-dp83867-disable-eee-support-as-not-implemented.patch b/queue-6.12/net-phy-dp83867-disable-eee-support-as-not-implemented.patch new file mode 100644 index 0000000000..f0a65a8194 --- /dev/null +++ b/queue-6.12/net-phy-dp83867-disable-eee-support-as-not-implemented.patch @@ -0,0 +1,54 @@ +From stable+bounces-192064-greg=kroah.com@vger.kernel.org Sun Nov 2 23:47:24 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 09:46:46 -0500 +Subject: net: phy: dp83867: Disable EEE support as not implemented +To: stable@vger.kernel.org +Cc: Emanuele Ghidoli , Andrew Lunn , Jakub Kicinski , Sasha Levin +Message-ID: <20251102144646.3457653-2-sashal@kernel.org> + +From: Emanuele Ghidoli + +[ Upstream commit 84a905290cb4c3d9a71a9e3b2f2e02e031e7512f ] + +While the DP83867 PHYs report EEE capability through their feature +registers, the actual hardware does not support EEE (see Links). +When the connected MAC enables EEE, it causes link instability and +communication failures. + +The issue is reproducible with a iMX8MP and relevant stmmac ethernet port. +Since the introduction of phylink-managed EEE support in the stmmac driver, +EEE is now enabled by default, leading to issues on systems using the +DP83867 PHY. + +Call phy_disable_eee during phy initialization to prevent EEE from being +enabled on DP83867 PHYs. + +Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/1445244/dp83867ir-dp83867-disable-eee-lpi +Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/658638/dp83867ir-eee-energy-efficient-ethernet +Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") +Cc: stable@vger.kernel.org +Signed-off-by: Emanuele Ghidoli +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20251023144857.529566-1-ghidoliemanuele@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/dp83867.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/phy/dp83867.c ++++ b/drivers/net/phy/dp83867.c +@@ -792,6 +792,12 @@ static int dp83867_config_init(struct ph + return ret; + } + ++ /* Although the DP83867 reports EEE capability through the ++ * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature ++ * is not actually implemented in hardware. ++ */ ++ phy_disable_eee(phydev); ++ + if (phy_interface_is_rgmii(phydev) || + phydev->interface == PHY_INTERFACE_MODE_SGMII) { + val = phy_read(phydev, MII_DP83867_PHYCTRL); diff --git a/queue-6.12/sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch b/queue-6.12/sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch new file mode 100644 index 0000000000..eb07310471 --- /dev/null +++ b/queue-6.12/sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch @@ -0,0 +1,50 @@ +From stable+bounces-192062-greg=kroah.com@vger.kernel.org Sun Nov 2 23:44:26 2025 +From: Sasha Levin +Date: Sun, 2 Nov 2025 09:44:17 -0500 +Subject: sched_ext: Mark scx_bpf_dsq_move_set_[slice|vtime]() with KF_RCU +To: stable@vger.kernel.org +Cc: Tejun Heo , Andrea Righi , Sasha Levin +Message-ID: <20251102144417.3456382-1-sashal@kernel.org> + +From: Tejun Heo + +[ Upstream commit 54e96258a6930909b690fd7e8889749231ba8085 ] + +scx_bpf_dsq_move_set_slice() and scx_bpf_dsq_move_set_vtime() take a DSQ +iterator argument which has to be valid. Mark them with KF_RCU. + +Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") +Cc: stable@vger.kernel.org # v6.12+ +Acked-by: Andrea Righi +Signed-off-by: Tejun Heo +[ scx_bpf_dsq_move_set_* => scx_bpf_dispatch_from_dsq_set_* ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -6493,8 +6493,8 @@ BTF_KFUNCS_START(scx_kfunc_ids_dispatch) + BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) + BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) + BTF_ID_FLAGS(func, scx_bpf_consume) +-BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice) +-BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime) ++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice, KF_RCU) ++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime, KF_RCU) + BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) + BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) + BTF_KFUNCS_END(scx_kfunc_ids_dispatch) +@@ -6593,8 +6593,8 @@ __bpf_kfunc_end_defs(); + + BTF_KFUNCS_START(scx_kfunc_ids_unlocked) + BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE) +-BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice) +-BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime) ++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice, KF_RCU) ++BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime, KF_RCU) + BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU) + BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU) + BTF_KFUNCS_END(scx_kfunc_ids_unlocked) diff --git a/queue-6.12/series b/queue-6.12/series index 9c52f9aed4..994974f522 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -70,3 +70,14 @@ drm-mediatek-fix-device-use-after-free-on-unbind.patch drm-ast-clear-preserved-bits-from-register-output-value.patch drm-amd-check-that-vpe-has-reached-dpm0-in-idle-handler.patch drm-amd-display-fix-incorrect-return-of-vblank-enable-on-unconfigured-crtc.patch +acpi-fan-add-fan-speed-reporting-for-fans-with-only-_fst.patch +acpi-fan-use-platform-device-for-devres-related-actions.patch +net-phy-add-phy_disable_eee.patch +net-phy-dp83867-disable-eee-support-as-not-implemented.patch +sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch +cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch +cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch +mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch +mptcp-cleanup-mem-accounting.patch +mptcp-leverage-skb-deferral-free.patch +mptcp-fix-msg_peek-stream-corruption.patch -- 2.47.3