From: Greg Kroah-Hartman Date: Fri, 17 Oct 2025 07:43:42 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v5.15.195~34 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5883bbe37f68496418c2c8b00a5a6d3b7225802b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: acpi-battery-add-synchronization-between-interface-updates.patch acpi-battery-allocate-driver-data-through-devm_-apis.patch acpi-battery-check-for-error-code-from-devm_mutex_init-call.patch acpi-battery-initialize-mutexes-through-devm_-apis.patch acpi-property-add-code-comments-explaining-what-is-going-on.patch acpi-property-disregard-references-in-data-only-subnode-lists.patch acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch arm64-kprobes-call-set_memory_rox-for-kprobe-page.patch arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch ipmi-rework-user-message-limit-handling.patch ksmbd-add-max-ip-connections-parameter.patch kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch kvm-svm-skip-fastpath-emulation-on-vm-exit-if-next-rip-isn-t-valid.patch media-mc-clear-minor-number-before-put-device.patch mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch mfd-intel_soc_pmic_chtdc_ti-fix-invalid-regmap-config-max_register-value.patch mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch misc-fastrpc-add-missing-dev_err-newlines.patch misc-fastrpc-save-actual-dma-size-in-fastrpc_map-structure.patch mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch pci-endpoint-pci-epf-test-add-null-check-for-dma-channels-before-release.patch pci-endpoint-remove-surplus-return-statement-from-pci_epf_test_clean_dma_chan.patch rseq-protect-event-mask-against-membarrier-ipi.patch s390-bpf-centralize-frame-offset-calculations.patch s390-bpf-change-seen_reg-to-a-mask.patch s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch squashfs-add-additional-inode-sanity-checking.patch squashfs-reject-negative-file-sizes-in-squashfs_read_inode.patch tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch --- diff --git a/queue-6.6/acpi-battery-add-synchronization-between-interface-updates.patch b/queue-6.6/acpi-battery-add-synchronization-between-interface-updates.patch new file mode 100644 index 0000000000..756fec56a2 --- /dev/null +++ b/queue-6.6/acpi-battery-add-synchronization-between-interface-updates.patch @@ -0,0 +1,217 @@ +From stable+bounces-186158-greg=kroah.com@vger.kernel.org Thu Oct 16 15:54:23 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 09:54:12 -0400 +Subject: ACPI: battery: Add synchronization between interface updates +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , GuangFei Luo , Sasha Levin +Message-ID: <20251016135412.3299634-4-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit 399dbcadc01ebf0035f325eaa8c264f8b5cd0a14 ] + +There is no synchronization between different code paths in the ACPI +battery driver that update its sysfs interface or its power supply +class device interface. In some cases this results to functional +failures due to race conditions. + +One example of this is when two ACPI notifications: + + - ACPI_BATTERY_NOTIFY_STATUS (0x80) + - ACPI_BATTERY_NOTIFY_INFO (0x81) + +are triggered (by the platform firmware) in a row with a little delay +in between after removing and reinserting a laptop battery. Both +notifications cause acpi_battery_update() to be called and if the delay +between them is sufficiently small, sysfs_add_battery() can be re-entered +before battery->bat is set which leads to a duplicate sysfs entry error: + + sysfs: cannot create duplicate filename '/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0A:00/power_supply/BAT1' + CPU: 1 UID: 0 PID: 185 Comm: kworker/1:4 Kdump: loaded Not tainted 6.12.38+deb13-amd64 #1 Debian 6.12.38-1 + Hardware name: Gateway NV44 /SJV40-MV , BIOS V1.3121 04/08/2009 + Workqueue: kacpi_notify acpi_os_execute_deferred + Call Trace: + + dump_stack_lvl+0x5d/0x80 + sysfs_warn_dup.cold+0x17/0x23 + sysfs_create_dir_ns+0xce/0xe0 + kobject_add_internal+0xba/0x250 + kobject_add+0x96/0xc0 + ? get_device_parent+0xde/0x1e0 + device_add+0xe2/0x870 + __power_supply_register.part.0+0x20f/0x3f0 + ? wake_up_q+0x4e/0x90 + sysfs_add_battery+0xa4/0x1d0 [battery] + acpi_battery_update+0x19e/0x290 [battery] + acpi_battery_notify+0x50/0x120 [battery] + acpi_ev_notify_dispatch+0x49/0x70 + acpi_os_execute_deferred+0x1a/0x30 + process_one_work+0x177/0x330 + worker_thread+0x251/0x390 + ? __pfx_worker_thread+0x10/0x10 + kthread+0xd2/0x100 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x34/0x50 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + kobject: kobject_add_internal failed for BAT1 with -EEXIST, don't try to register things with the same name in the same directory. + +There are also other scenarios in which analogous issues may occur. + +Address this by using a common lock in all of the code paths leading +to updates of driver interfaces: ACPI Notify () handler, system resume +callback and post-resume notification, device addition and removal. + +This new lock replaces sysfs_lock that has been used only in +sysfs_remove_battery() which now is going to be always called under +the new lock, so it doesn't need any internal locking any more. + +Fixes: 10666251554c ("ACPI: battery: Install Notify() handler directly") +Closes: https://lore.kernel.org/linux-acpi/20250910142653.313360-1-luogf2025@163.com/ +Reported-by: GuangFei Luo +Tested-by: GuangFei Luo +Cc: 6.6+ # 6.6+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/battery.c | 43 +++++++++++++++++++++++++++++-------------- + 1 file changed, 29 insertions(+), 14 deletions(-) + +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -94,7 +94,7 @@ enum { + + struct acpi_battery { + struct mutex lock; +- struct mutex sysfs_lock; ++ struct mutex update_lock; + struct power_supply *bat; + struct power_supply_desc bat_desc; + struct acpi_device *device; +@@ -888,15 +888,12 @@ static int sysfs_add_battery(struct acpi + + static void sysfs_remove_battery(struct acpi_battery *battery) + { +- mutex_lock(&battery->sysfs_lock); +- if (!battery->bat) { +- mutex_unlock(&battery->sysfs_lock); ++ if (!battery->bat) + return; +- } ++ + battery_hook_remove_battery(battery); + power_supply_unregister(battery->bat); + battery->bat = NULL; +- mutex_unlock(&battery->sysfs_lock); + } + + static void find_battery(const struct dmi_header *dm, void *private) +@@ -1056,6 +1053,9 @@ static void acpi_battery_notify(acpi_han + + if (!battery) + return; ++ ++ guard(mutex)(&battery->update_lock); ++ + old = battery->bat; + /* + * On Acer Aspire V5-573G notifications are sometimes triggered too +@@ -1078,21 +1078,22 @@ static void acpi_battery_notify(acpi_han + } + + static int battery_notify(struct notifier_block *nb, +- unsigned long mode, void *_unused) ++ unsigned long mode, void *_unused) + { + struct acpi_battery *battery = container_of(nb, struct acpi_battery, + pm_nb); +- int result; + +- switch (mode) { +- case PM_POST_HIBERNATION: +- case PM_POST_SUSPEND: ++ if (mode == PM_POST_SUSPEND || mode == PM_POST_HIBERNATION) { ++ guard(mutex)(&battery->update_lock); ++ + if (!acpi_battery_present(battery)) + return 0; + + if (battery->bat) { + acpi_battery_refresh(battery); + } else { ++ int result; ++ + result = acpi_battery_get_info(battery); + if (result) + return result; +@@ -1104,7 +1105,6 @@ static int battery_notify(struct notifie + + acpi_battery_init_alarm(battery); + acpi_battery_get_state(battery); +- break; + } + + return 0; +@@ -1182,6 +1182,8 @@ static int acpi_battery_update_retry(str + { + int retry, ret; + ++ guard(mutex)(&battery->update_lock); ++ + for (retry = 5; retry; retry--) { + ret = acpi_battery_update(battery, false); + if (!ret) +@@ -1192,6 +1194,13 @@ static int acpi_battery_update_retry(str + return ret; + } + ++static void sysfs_battery_cleanup(struct acpi_battery *battery) ++{ ++ guard(mutex)(&battery->update_lock); ++ ++ sysfs_remove_battery(battery); ++} ++ + static int acpi_battery_add(struct acpi_device *device) + { + int result = 0; +@@ -1214,7 +1223,7 @@ static int acpi_battery_add(struct acpi_ + if (result) + return result; + +- result = devm_mutex_init(&device->dev, &battery->sysfs_lock); ++ result = devm_mutex_init(&device->dev, &battery->update_lock); + if (result) + return result; + +@@ -1244,7 +1253,7 @@ fail_pm: + device_init_wakeup(&device->dev, 0); + unregister_pm_notifier(&battery->pm_nb); + fail: +- sysfs_remove_battery(battery); ++ sysfs_battery_cleanup(battery); + + return result; + } +@@ -1263,6 +1272,9 @@ static void acpi_battery_remove(struct a + + device_init_wakeup(&device->dev, 0); + unregister_pm_notifier(&battery->pm_nb); ++ ++ guard(mutex)(&battery->update_lock); ++ + sysfs_remove_battery(battery); + } + +@@ -1280,6 +1292,9 @@ static int acpi_battery_resume(struct de + return -EINVAL; + + battery->update_time = 0; ++ ++ guard(mutex)(&battery->update_lock); ++ + acpi_battery_update(battery, true); + return 0; + } diff --git a/queue-6.6/acpi-battery-allocate-driver-data-through-devm_-apis.patch b/queue-6.6/acpi-battery-allocate-driver-data-through-devm_-apis.patch new file mode 100644 index 0000000000..e416857885 --- /dev/null +++ b/queue-6.6/acpi-battery-allocate-driver-data-through-devm_-apis.patch @@ -0,0 +1,51 @@ +From stable+bounces-186156-greg=kroah.com@vger.kernel.org Thu Oct 16 15:54:20 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 09:54:09 -0400 +Subject: ACPI: battery: allocate driver data through devm_ APIs +To: stable@vger.kernel.org +Cc: "Thomas Weißschuh" , "Rafael J. Wysocki" , "Sasha Levin" +Message-ID: <20251016135412.3299634-1-sashal@kernel.org> + +From: Thomas Weißschuh + +[ Upstream commit 909dfc60692331e1599d5e28a8f08a611f353aef ] + +Simplify the cleanup logic a bit. + +Signed-off-by: Thomas Weißschuh +Link: https://patch.msgid.link/20240904-acpi-battery-cleanups-v1-2-a3bf74f22d40@weissschuh.net +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 399dbcadc01e ("ACPI: battery: Add synchronization between interface updates") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/battery.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -1203,7 +1203,7 @@ static int acpi_battery_add(struct acpi_ + if (device->dep_unmet) + return -EPROBE_DEFER; + +- battery = kzalloc(sizeof(struct acpi_battery), GFP_KERNEL); ++ battery = devm_kzalloc(&device->dev, sizeof(*battery), GFP_KERNEL); + if (!battery) + return -ENOMEM; + battery->device = device; +@@ -1241,7 +1241,6 @@ fail: + sysfs_remove_battery(battery); + mutex_destroy(&battery->lock); + mutex_destroy(&battery->sysfs_lock); +- kfree(battery); + + return result; + } +@@ -1264,7 +1263,6 @@ static void acpi_battery_remove(struct a + + mutex_destroy(&battery->lock); + mutex_destroy(&battery->sysfs_lock); +- kfree(battery); + } + + #ifdef CONFIG_PM_SLEEP diff --git a/queue-6.6/acpi-battery-check-for-error-code-from-devm_mutex_init-call.patch b/queue-6.6/acpi-battery-check-for-error-code-from-devm_mutex_init-call.patch new file mode 100644 index 0000000000..0930af4c1e --- /dev/null +++ b/queue-6.6/acpi-battery-check-for-error-code-from-devm_mutex_init-call.patch @@ -0,0 +1,49 @@ +From stable+bounces-186159-greg=kroah.com@vger.kernel.org Thu Oct 16 15:54:26 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 09:54:11 -0400 +Subject: ACPI: battery: Check for error code from devm_mutex_init() call +To: stable@vger.kernel.org +Cc: "Andy Shevchenko" , "Thomas Weißschuh" , "Rafael J. Wysocki" , "Sasha Levin" +Message-ID: <20251016135412.3299634-3-sashal@kernel.org> + +From: Andy Shevchenko + +[ Upstream commit 815daedc318b2f9f1b956d0631377619a0d69d96 ] + +Even if it's not critical, the avoidance of checking the error code +from devm_mutex_init() call today diminishes the point of using devm +variant of it. Tomorrow it may even leak something. Add the missed +check. + +Fixes: 0710c1ce5045 ("ACPI: battery: initialize mutexes through devm_ APIs") +Signed-off-by: Andy Shevchenko +Reviewed-by: Thomas Weißschuh +Link: https://patch.msgid.link/20241030162754.2110946-1-andriy.shevchenko@linux.intel.com +[ rjw: Added 2 empty code lines ] +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 399dbcadc01e ("ACPI: battery: Add synchronization between interface updates") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/battery.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -1210,8 +1210,14 @@ static int acpi_battery_add(struct acpi_ + strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); + device->driver_data = battery; +- devm_mutex_init(&device->dev, &battery->lock); +- devm_mutex_init(&device->dev, &battery->sysfs_lock); ++ result = devm_mutex_init(&device->dev, &battery->lock); ++ if (result) ++ return result; ++ ++ result = devm_mutex_init(&device->dev, &battery->sysfs_lock); ++ if (result) ++ return result; ++ + if (acpi_has_method(battery->device->handle, "_BIX")) + set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); + diff --git a/queue-6.6/acpi-battery-initialize-mutexes-through-devm_-apis.patch b/queue-6.6/acpi-battery-initialize-mutexes-through-devm_-apis.patch new file mode 100644 index 0000000000..cf0df07437 --- /dev/null +++ b/queue-6.6/acpi-battery-initialize-mutexes-through-devm_-apis.patch @@ -0,0 +1,56 @@ +From stable+bounces-186157-greg=kroah.com@vger.kernel.org Thu Oct 16 15:54:22 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 09:54:10 -0400 +Subject: ACPI: battery: initialize mutexes through devm_ APIs +To: stable@vger.kernel.org +Cc: "Thomas Weißschuh" , "Rafael J. Wysocki" , "Sasha Levin" +Message-ID: <20251016135412.3299634-2-sashal@kernel.org> + +From: Thomas Weißschuh + +[ Upstream commit 0710c1ce50455ed0db91bffa0eebbaa4f69b1773 ] + +Simplify the cleanup logic a bit. + +Signed-off-by: Thomas Weißschuh +Link: https://patch.msgid.link/20240904-acpi-battery-cleanups-v1-3-a3bf74f22d40@weissschuh.net +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 399dbcadc01e ("ACPI: battery: Add synchronization between interface updates") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/battery.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -1210,8 +1210,8 @@ static int acpi_battery_add(struct acpi_ + strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); + device->driver_data = battery; +- mutex_init(&battery->lock); +- mutex_init(&battery->sysfs_lock); ++ devm_mutex_init(&device->dev, &battery->lock); ++ devm_mutex_init(&device->dev, &battery->sysfs_lock); + if (acpi_has_method(battery->device->handle, "_BIX")) + set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); + +@@ -1239,8 +1239,6 @@ fail_pm: + unregister_pm_notifier(&battery->pm_nb); + fail: + sysfs_remove_battery(battery); +- mutex_destroy(&battery->lock); +- mutex_destroy(&battery->sysfs_lock); + + return result; + } +@@ -1260,9 +1258,6 @@ static void acpi_battery_remove(struct a + device_init_wakeup(&device->dev, 0); + unregister_pm_notifier(&battery->pm_nb); + sysfs_remove_battery(battery); +- +- mutex_destroy(&battery->lock); +- mutex_destroy(&battery->sysfs_lock); + } + + #ifdef CONFIG_PM_SLEEP diff --git a/queue-6.6/acpi-property-add-code-comments-explaining-what-is-going-on.patch b/queue-6.6/acpi-property-add-code-comments-explaining-what-is-going-on.patch new file mode 100644 index 0000000000..bae62bd6a8 --- /dev/null +++ b/queue-6.6/acpi-property-add-code-comments-explaining-what-is-going-on.patch @@ -0,0 +1,115 @@ +From stable+bounces-186200-greg=kroah.com@vger.kernel.org Thu Oct 16 21:28:42 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 15:28:31 -0400 +Subject: ACPI: property: Add code comments explaining what is going on +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Sakari Ailus , Sasha Levin +Message-ID: <20251016192832.3384290-3-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit 737c3a09dcf69ba2814f3674947ccaec1861c985 ] + +In some places in the ACPI device properties handling code, it is +unclear why the code is what it is. Some assumptions are not documented +and some pieces of code are based on knowledge that is not mentioned +anywhere. + +Add code comments explaining these things. + +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Sakari Ailus +Tested-by: Sakari Ailus +Stable-dep-of: baf60d5cb8bc ("ACPI: property: Do not pass NULL handles to acpi_attach_data()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/property.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 44 insertions(+), 2 deletions(-) + +--- a/drivers/acpi/property.c ++++ b/drivers/acpi/property.c +@@ -96,7 +96,18 @@ static bool acpi_nondev_subnode_extract( + if (handle) + acpi_get_parent(handle, &scope); + ++ /* ++ * Extract properties from the _DSD-equivalent package pointed to by ++ * desc and use scope (if not NULL) for the completion of relative ++ * pathname segments. ++ * ++ * The extracted properties will be held in the new data node dn. ++ */ + result = acpi_extract_properties(scope, desc, &dn->data); ++ /* ++ * Look for subnodes in the _DSD-equivalent package pointed to by desc ++ * and create child nodes of dn if there are any. ++ */ + if (acpi_enumerate_nondev_subnodes(scope, desc, &dn->data, &dn->fwnode)) + result = true; + +@@ -121,6 +132,12 @@ static bool acpi_nondev_subnode_ok(acpi_ + acpi_handle handle; + acpi_status status; + ++ /* ++ * If the scope is unknown, the _DSD-equivalent package being parsed ++ * was embedded in an outer _DSD-equivalent package as a result of ++ * direct evaluation of an object pointed to by a reference. In that ++ * case, using a pathname as the target object pointer is invalid. ++ */ + if (!scope) + return false; + +@@ -150,6 +167,10 @@ static bool acpi_add_nondev_subnodes(acp + bool ret = false; + int i; + ++ /* ++ * Every element in the links package is expected to represent a link ++ * to a non-device node in a tree containing device-specific data. ++ */ + for (i = 0; i < links->package.count; i++) { + union acpi_object *link, *desc; + bool result; +@@ -159,17 +180,38 @@ static bool acpi_add_nondev_subnodes(acp + if (link->package.count != 2) + continue; + +- /* The first one must be a string. */ ++ /* The first one (the key) must be a string. */ + if (link->package.elements[0].type != ACPI_TYPE_STRING) + continue; + +- /* The second one may be a string or a package. */ ++ /* The second one (the target) may be a string or a package. */ + switch (link->package.elements[1].type) { + case ACPI_TYPE_STRING: ++ /* ++ * The string is expected to be a full pathname or a ++ * pathname segment relative to the given scope. That ++ * pathname is expected to point to an object returning ++ * a package that contains _DSD-equivalent information. ++ */ + result = acpi_nondev_subnode_ok(scope, link, list, + parent); + break; + case ACPI_TYPE_PACKAGE: ++ /* ++ * This happens when a reference is used in AML to ++ * point to the target. Since the target is expected ++ * to be a named object, a reference to it will cause it ++ * to be avaluated in place and its return package will ++ * be embedded in the links package at the location of ++ * the reference. ++ * ++ * The target package is expected to contain _DSD- ++ * equivalent information, but the scope in which it ++ * is located in the original AML is unknown. Thus ++ * it cannot contain pathname segments represented as ++ * strings because there is no way to build full ++ * pathnames out of them. ++ */ + desc = &link->package.elements[1]; + result = acpi_nondev_subnode_extract(desc, NULL, link, + list, parent); diff --git a/queue-6.6/acpi-property-disregard-references-in-data-only-subnode-lists.patch b/queue-6.6/acpi-property-disregard-references-in-data-only-subnode-lists.patch new file mode 100644 index 0000000000..b5a0265daa --- /dev/null +++ b/queue-6.6/acpi-property-disregard-references-in-data-only-subnode-lists.patch @@ -0,0 +1,138 @@ +From stable+bounces-186199-greg=kroah.com@vger.kernel.org Thu Oct 16 21:28:41 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 15:28:30 -0400 +Subject: ACPI: property: Disregard references in data-only subnode lists +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Sakari Ailus , Sasha Levin +Message-ID: <20251016192832.3384290-2-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit d06118fe9b03426484980ed4c189a8c7b99fa631 ] + +Data-only subnode links following the ACPI data subnode GUID in a _DSD +package are expected to point to named objects returning _DSD-equivalent +packages. If a reference to such an object is used in the target field +of any of those links, that object will be evaluated in place (as a +named object) and its return data will be embedded in the outer _DSD +package. + +For this reason, it is not expected to see a subnode link with the +target field containing a local reference (that would mean pointing +to a device or another object that cannot be evaluated in place and +therefore cannot return a _DSD-equivalent package). + +Accordingly, simplify the code parsing data-only subnode links to +simply print a message when it encounters a local reference in the +target field of one of those links. + +Moreover, since acpi_nondev_subnode_data_ok() would only have one +caller after the change above, fold it into that caller. + +Link: https://lore.kernel.org/linux-acpi/CAJZ5v0jVeSrDO6hrZhKgRZrH=FpGD4vNUjFD8hV9WwN9TLHjzQ@mail.gmail.com/ +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Sakari Ailus +Tested-by: Sakari Ailus +Stable-dep-of: baf60d5cb8bc ("ACPI: property: Do not pass NULL handles to acpi_attach_data()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/property.c | 51 ++++++++++++++++++++---------------------------- + 1 file changed, 22 insertions(+), 29 deletions(-) + +--- a/drivers/acpi/property.c ++++ b/drivers/acpi/property.c +@@ -112,32 +112,12 @@ static bool acpi_nondev_subnode_extract( + return false; + } + +-static bool acpi_nondev_subnode_data_ok(acpi_handle handle, +- const union acpi_object *link, +- struct list_head *list, +- struct fwnode_handle *parent) +-{ +- struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; +- acpi_status status; +- +- status = acpi_evaluate_object_typed(handle, NULL, NULL, &buf, +- ACPI_TYPE_PACKAGE); +- if (ACPI_FAILURE(status)) +- return false; +- +- if (acpi_nondev_subnode_extract(buf.pointer, handle, link, list, +- parent)) +- return true; +- +- ACPI_FREE(buf.pointer); +- return false; +-} +- + static bool acpi_nondev_subnode_ok(acpi_handle scope, + const union acpi_object *link, + struct list_head *list, + struct fwnode_handle *parent) + { ++ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER }; + acpi_handle handle; + acpi_status status; + +@@ -149,7 +129,17 @@ static bool acpi_nondev_subnode_ok(acpi_ + if (ACPI_FAILURE(status)) + return false; + +- return acpi_nondev_subnode_data_ok(handle, link, list, parent); ++ status = acpi_evaluate_object_typed(handle, NULL, NULL, &buf, ++ ACPI_TYPE_PACKAGE); ++ if (ACPI_FAILURE(status)) ++ return false; ++ ++ if (acpi_nondev_subnode_extract(buf.pointer, handle, link, list, ++ parent)) ++ return true; ++ ++ ACPI_FREE(buf.pointer); ++ return false; + } + + static bool acpi_add_nondev_subnodes(acpi_handle scope, +@@ -162,7 +152,6 @@ static bool acpi_add_nondev_subnodes(acp + + for (i = 0; i < links->package.count; i++) { + union acpi_object *link, *desc; +- acpi_handle handle; + bool result; + + link = &links->package.elements[i]; +@@ -174,22 +163,26 @@ static bool acpi_add_nondev_subnodes(acp + if (link->package.elements[0].type != ACPI_TYPE_STRING) + continue; + +- /* The second one may be a string, a reference or a package. */ ++ /* The second one may be a string or a package. */ + switch (link->package.elements[1].type) { + case ACPI_TYPE_STRING: + result = acpi_nondev_subnode_ok(scope, link, list, + parent); + break; +- case ACPI_TYPE_LOCAL_REFERENCE: +- handle = link->package.elements[1].reference.handle; +- result = acpi_nondev_subnode_data_ok(handle, link, list, +- parent); +- break; + case ACPI_TYPE_PACKAGE: + desc = &link->package.elements[1]; + result = acpi_nondev_subnode_extract(desc, NULL, link, + list, parent); + break; ++ case ACPI_TYPE_LOCAL_REFERENCE: ++ /* ++ * It is not expected to see any local references in ++ * the links package because referencing a named object ++ * should cause it to be evaluated in place. ++ */ ++ acpi_handle_info(scope, "subnode %s: Unexpected reference\n", ++ link->package.elements[0].string.pointer); ++ fallthrough; + default: + result = false; + break; diff --git a/queue-6.6/acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch b/queue-6.6/acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch new file mode 100644 index 0000000000..17c802d546 --- /dev/null +++ b/queue-6.6/acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch @@ -0,0 +1,77 @@ +From stable+bounces-186201-greg=kroah.com@vger.kernel.org Thu Oct 16 21:28:46 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 15:28:32 -0400 +Subject: ACPI: property: Do not pass NULL handles to acpi_attach_data() +To: stable@vger.kernel.org +Cc: "Rafael J. Wysocki" , Sakari Ailus , Sasha Levin +Message-ID: <20251016192832.3384290-4-sashal@kernel.org> + +From: "Rafael J. Wysocki" + +[ Upstream commit baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c ] + +In certain circumstances, the ACPI handle of a data-only node may be +NULL, in which case it does not make sense to attempt to attach that +node to an ACPI namespace object, so update the code to avoid attempts +to do so. + +This prevents confusing and unuseful error messages from being printed. + +Also document the fact that the ACPI handle of a data-only node may be +NULL and when that happens in a code comment. In addition, make +acpi_add_nondev_subnodes() print a diagnostic message for each data-only +node with an unknown ACPI namespace scope. + +Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles") +Cc: 6.0+ # 6.0+ +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Sakari Ailus +Tested-by: Sakari Ailus +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/property.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/acpi/property.c ++++ b/drivers/acpi/property.c +@@ -112,6 +112,10 @@ static bool acpi_nondev_subnode_extract( + result = true; + + if (result) { ++ /* ++ * This will be NULL if the desc package is embedded in an outer ++ * _DSD-equivalent package and its scope cannot be determined. ++ */ + dn->handle = handle; + dn->data.pointer = desc; + list_add_tail(&dn->sibling, list); +@@ -212,6 +216,8 @@ static bool acpi_add_nondev_subnodes(acp + * strings because there is no way to build full + * pathnames out of them. + */ ++ acpi_handle_debug(scope, "subnode %s: Unknown scope\n", ++ link->package.elements[0].string.pointer); + desc = &link->package.elements[1]; + result = acpi_nondev_subnode_extract(desc, NULL, link, + list, parent); +@@ -384,6 +390,9 @@ static void acpi_untie_nondev_subnodes(s + struct acpi_data_node *dn; + + list_for_each_entry(dn, &data->subnodes, sibling) { ++ if (!dn->handle) ++ continue; ++ + acpi_detach_data(dn->handle, acpi_nondev_subnode_tag); + + acpi_untie_nondev_subnodes(&dn->data); +@@ -398,6 +407,9 @@ static bool acpi_tie_nondev_subnodes(str + acpi_status status; + bool ret; + ++ if (!dn->handle) ++ continue; ++ + status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn); + if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) { + acpi_handle_err(dn->handle, "Can't tag data node\n"); diff --git a/queue-6.6/arm64-kprobes-call-set_memory_rox-for-kprobe-page.patch b/queue-6.6/arm64-kprobes-call-set_memory_rox-for-kprobe-page.patch new file mode 100644 index 0000000000..99629a739a --- /dev/null +++ b/queue-6.6/arm64-kprobes-call-set_memory_rox-for-kprobe-page.patch @@ -0,0 +1,47 @@ +From stable+bounces-186205-greg=kroah.com@vger.kernel.org Thu Oct 16 21:57:45 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 15:57:28 -0400 +Subject: arm64: kprobes: call set_memory_rox() for kprobe page +To: stable@vger.kernel.org +Cc: Yang Shi , Catalin Marinas , Will Deacon , Sasha Levin +Message-ID: <20251016195728.3396584-1-sashal@kernel.org> + +From: Yang Shi + +[ Upstream commit 195a1b7d8388c0ec2969a39324feb8bebf9bb907 ] + +The kprobe page is allocated by execmem allocator with ROX permission. +It needs to call set_memory_rox() to set proper permission for the +direct map too. It was missed. + +Fixes: 10d5e97c1bf8 ("arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page") +Cc: +Signed-off-by: Yang Shi +Reviewed-by: Catalin Marinas +Signed-off-by: Will Deacon +[ kept existing __vmalloc_node_range() instead of upstream's execmem_alloc() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/probes/kprobes.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/arm64/kernel/probes/kprobes.c ++++ b/arch/arm64/kernel/probes/kprobes.c +@@ -131,9 +131,15 @@ int __kprobes arch_prepare_kprobe(struct + + void *alloc_insn_page(void) + { +- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, ++ void *addr; ++ ++ addr = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); ++ if (!addr) ++ return NULL; ++ set_memory_rox((unsigned long)addr, 1); ++ return addr; + } + + /* arm kprobe: install breakpoint in text */ diff --git a/queue-6.6/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch b/queue-6.6/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch new file mode 100644 index 0000000000..f24cce016e --- /dev/null +++ b/queue-6.6/arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch @@ -0,0 +1,91 @@ +From stable+bounces-186231-greg=kroah.com@vger.kernel.org Fri Oct 17 03:25:31 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 21:25:22 -0400 +Subject: arm64: mte: Do not flag the zero page as PG_mte_tagged +To: stable@vger.kernel.org +Cc: Catalin Marinas , Gergely Kovacs , Will Deacon , David Hildenbrand , Lance Yang , Sasha Levin +Message-ID: <20251017012522.3571144-1-sashal@kernel.org> + +From: Catalin Marinas + +[ Upstream commit f620d66af3165838bfa845dcf9f5f9b4089bf508 ] + +Commit 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the +zero page") attempted to fix ptrace() reading of tags from the zero page +by marking it as PG_mte_tagged during cpu_enable_mte(). The same commit +also changed the ptrace() tag access permission check to the VM_MTE vma +flag while turning the page flag test into a WARN_ON_ONCE(). + +Attempting to set the PG_mte_tagged flag early with +CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled may either hang (after commit +d77e59a8fccd "arm64: mte: Lock a page for MTE tag initialisation") or +have the flags cleared later during page_alloc_init_late(). In addition, +pages_identical() -> memcmp_pages() will reject any comparison with the +zero page as it is marked as tagged. + +Partially revert the above commit to avoid setting PG_mte_tagged on the +zero page. Update the __access_remote_tags() warning on untagged pages +to ignore the zero page since it is known to have the tags initialised. + +Note that all user mapping of the zero page are marked as pte_special(). +The arm64 set_pte_at() will not call mte_sync_tags() on such pages, so +PG_mte_tagged will remain cleared. + +Signed-off-by: Catalin Marinas +Fixes: 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the zero page") +Reported-by: Gergely Kovacs +Cc: stable@vger.kernel.org # 5.10.x +Cc: Will Deacon +Cc: David Hildenbrand +Cc: Lance Yang +Acked-by: Lance Yang +Reviewed-by: David Hildenbrand +Tested-by: Lance Yang +Signed-off-by: Will Deacon +[ Adjust context ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpufeature.c | 10 +++++++--- + arch/arm64/kernel/mte.c | 3 ++- + 2 files changed, 9 insertions(+), 4 deletions(-) + +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -2174,17 +2174,21 @@ static void bti_enable(const struct arm6 + #ifdef CONFIG_ARM64_MTE + static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) + { ++ static bool cleared_zero_page = false; ++ + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + + mte_cpu_setup(); + + /* + * Clear the tags in the zero page. This needs to be done via the +- * linear map which has the Tagged attribute. ++ * linear map which has the Tagged attribute. Since this page is ++ * always mapped as pte_special(), set_pte_at() will not attempt to ++ * clear the tags or set PG_mte_tagged. + */ +- if (try_page_mte_tagging(ZERO_PAGE(0))) { ++ if (!cleared_zero_page) { ++ cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); +- set_page_mte_tagged(ZERO_PAGE(0)); + } + + kasan_init_hw_tags_cpu(); +--- a/arch/arm64/kernel/mte.c ++++ b/arch/arm64/kernel/mte.c +@@ -428,7 +428,8 @@ static int __access_remote_tags(struct m + put_page(page); + break; + } +- WARN_ON_ONCE(!page_mte_tagged(page)); ++ ++ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); + + /* limit access to the end of the page */ + offset = offset_in_page(addr); diff --git a/queue-6.6/btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch b/queue-6.6/btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch new file mode 100644 index 0000000000..df95585bf3 --- /dev/null +++ b/queue-6.6/btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch @@ -0,0 +1,147 @@ +From stable+bounces-185824-greg=kroah.com@vger.kernel.org Wed Oct 15 16:22:38 2025 +From: Sasha Levin +Date: Wed, 15 Oct 2025 10:20:22 -0400 +Subject: btrfs: fix the incorrect max_bytes value for find_lock_delalloc_range() +To: stable@vger.kernel.org +Cc: Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20251015142022.1428901-1-sashal@kernel.org> + +From: Qu Wenruo + +[ Upstream commit 7b26da407420e5054e3f06c5d13271697add9423 ] + +[BUG] +With my local branch to enable bs > ps support for btrfs, sometimes I +hit the following ASSERT() inside submit_one_sector(): + + ASSERT(block_start != EXTENT_MAP_HOLE); + +Please note that it's not yet possible to hit this ASSERT() in the wild +yet, as it requires btrfs bs > ps support, which is not even in the +development branch. + +But on the other hand, there is also a very low chance to hit above +ASSERT() with bs < ps cases, so this is an existing bug affect not only +the incoming bs > ps support but also the existing bs < ps support. + +[CAUSE] +Firstly that ASSERT() means we're trying to submit a dirty block but +without a real extent map nor ordered extent map backing it. + +Furthermore with extra debugging, the folio triggering such ASSERT() is +always larger than the fs block size in my bs > ps case. +(8K block size, 4K page size) + +After some more debugging, the ASSERT() is trigger by the following +sequence: + + extent_writepage() + | We got a 32K folio (4 fs blocks) at file offset 0, and the fs block + | size is 8K, page size is 4K. + | And there is another 8K folio at file offset 32K, which is also + | dirty. + | So the filemap layout looks like the following: + | + | "||" is the filio boundary in the filemap. + | "//| is the dirty range. + | + | 0 8K 16K 24K 32K 40K + | |////////| |//////////////////////||////////| + | + |- writepage_delalloc() + | |- find_lock_delalloc_range() for [0, 8K) + | | Now range [0, 8K) is properly locked. + | | + | |- find_lock_delalloc_range() for [16K, 40K) + | | |- btrfs_find_delalloc_range() returned range [16K, 40K) + | | |- lock_delalloc_folios() locked folio 0 successfully + | | | + | | | The filemap range [32K, 40K) got dropped from filemap. + | | | + | | |- lock_delalloc_folios() failed with -EAGAIN on folio 32K + | | | As the folio at 32K is dropped. + | | | + | | |- loops = 1; + | | |- max_bytes = PAGE_SIZE; + | | |- goto again; + | | | This will re-do the lookup for dirty delalloc ranges. + | | | + | | |- btrfs_find_delalloc_range() called with @max_bytes == 4K + | | | This is smaller than block size, so + | | | btrfs_find_delalloc_range() is unable to return any range. + | | \- return false; + | | + | \- Now only range [0, 8K) has an OE for it, but for dirty range + | [16K, 32K) it's dirty without an OE. + | This breaks the assumption that writepage_delalloc() will find + | and lock all dirty ranges inside the folio. + | + |- extent_writepage_io() + |- submit_one_sector() for [0, 8K) + | Succeeded + | + |- submit_one_sector() for [16K, 24K) + Triggering the ASSERT(), as there is no OE, and the original + extent map is a hole. + +Please note that, this also exposed the same problem for bs < ps +support. E.g. with 64K page size and 4K block size. + +If we failed to lock a folio, and falls back into the "loops = 1;" +branch, we will re-do the search using 64K as max_bytes. +Which may fail again to lock the next folio, and exit early without +handling all dirty blocks inside the folio. + +[FIX] +Instead of using the fixed size PAGE_SIZE as @max_bytes, use +@sectorsize, so that we are ensured to find and lock any remaining +blocks inside the folio. + +And since we're here, add an extra ASSERT() to +before calling btrfs_find_delalloc_range() to make sure the @max_bytes is +at least no smaller than a block to avoid false negative. + +Cc: stable@vger.kernel.org # 5.15+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +[ adapted folio terminology and API calls to page-based equivalents ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -364,6 +364,13 @@ again: + /* step one, find a bunch of delalloc bytes starting at start */ + delalloc_start = *start; + delalloc_end = 0; ++ ++ /* ++ * If @max_bytes is smaller than a block, btrfs_find_delalloc_range() can ++ * return early without handling any dirty ranges. ++ */ ++ ASSERT(max_bytes >= fs_info->sectorsize); ++ + found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, + max_bytes, &cached_state); + if (!found || delalloc_end <= *start || delalloc_start > orig_end) { +@@ -394,13 +401,14 @@ again: + delalloc_start, delalloc_end); + ASSERT(!ret || ret == -EAGAIN); + if (ret == -EAGAIN) { +- /* some of the pages are gone, lets avoid looping by +- * shortening the size of the delalloc range we're searching ++ /* ++ * Some of the pages are gone, lets avoid looping by ++ * shortening the size of the delalloc range we're searching. + */ + free_extent_state(cached_state); + cached_state = NULL; + if (!loops) { +- max_bytes = PAGE_SIZE; ++ max_bytes = fs_info->sectorsize; + loops = 1; + goto again; + } else { diff --git a/queue-6.6/ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch b/queue-6.6/ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch new file mode 100644 index 0000000000..1b6bb22716 --- /dev/null +++ b/queue-6.6/ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch @@ -0,0 +1,50 @@ +From stable+bounces-186187-greg=kroah.com@vger.kernel.org Thu Oct 16 20:50:24 2025 +From: Corey Minyard +Date: Thu, 16 Oct 2025 13:50:06 -0500 +Subject: ipmi: Fix handling of messages with provided receive message pointer +To: stable@vger.kernel.org +Cc: Guenter Roeck , Eric Dumazet , Greg Thelen , Corey Minyard +Message-ID: <20251016185006.1876032-2-corey@minyard.net> + +From: Guenter Roeck + +commit e2c69490dda5d4c9f1bfbb2898989c8f3530e354 upstream + +Prior to commit b52da4054ee0 ("ipmi: Rework user message limit handling"), +i_ipmi_request() used to increase the user reference counter if the receive +message is provided by the caller of IPMI API functions. This is no longer +the case. However, ipmi_free_recv_msg() is still called and decreases the +reference counter. This results in the reference counter reaching zero, +the user data pointer is released, and all kinds of interesting crashes are +seen. + +Fix the problem by increasing user reference counter if the receive message +has been provided by the caller. + +Fixes: b52da4054ee0 ("ipmi: Rework user message limit handling") +Reported-by: Eric Dumazet +Cc: Eric Dumazet +Cc: Greg Thelen +Signed-off-by: Guenter Roeck +Message-ID: <20251006201857.3433837-1-linux@roeck-us.net> +Signed-off-by: Corey Minyard +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/ipmi/ipmi_msghandler.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/char/ipmi/ipmi_msghandler.c ++++ b/drivers/char/ipmi/ipmi_msghandler.c +@@ -2311,8 +2311,11 @@ static int i_ipmi_request(struct ipmi_us + if (supplied_recv) { + recv_msg = supplied_recv; + recv_msg->user = user; +- if (user) ++ if (user) { + atomic_inc(&user->nr_msgs); ++ /* The put happens when the message is freed. */ ++ kref_get(&user->refcount); ++ } + } else { + recv_msg = ipmi_alloc_recv_msg(user); + if (IS_ERR(recv_msg)) diff --git a/queue-6.6/ipmi-rework-user-message-limit-handling.patch b/queue-6.6/ipmi-rework-user-message-limit-handling.patch new file mode 100644 index 0000000000..ab66c47bad --- /dev/null +++ b/queue-6.6/ipmi-rework-user-message-limit-handling.patch @@ -0,0 +1,646 @@ +From stable+bounces-186186-greg=kroah.com@vger.kernel.org Thu Oct 16 20:50:22 2025 +From: Corey Minyard +Date: Thu, 16 Oct 2025 13:50:05 -0500 +Subject: ipmi: Rework user message limit handling +To: stable@vger.kernel.org +Cc: Corey Minyard , Gilles BULOZ +Message-ID: <20251016185006.1876032-1-corey@minyard.net> + +From: Corey Minyard + +commit b52da4054ee0bf9ecb44996f2c83236ff50b3812 upstream + +This patch required quite a bit of work to backport due to a number +of unrelated changes that do not make sense to backport. This has +been run against my test suite and passes all tests. + +The limit on the number of user messages had a number of issues, +improper counting in some cases and a use after free. + +Restructure how this is all done to handle more in the receive message +allocation routine, so all refcouting and user message limit counts +are done in that routine. It's a lot cleaner and safer. + +Reported-by: Gilles BULOZ +Closes: https://lore.kernel.org/lkml/aLsw6G0GyqfpKs2S@mail.minyard.net/ +Fixes: 8e76741c3d8b ("ipmi: Add a limit on the number of users that may use IPMI") +Cc: # 4.19 +Signed-off-by: Corey Minyard +Tested-by: Gilles BULOZ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/ipmi/ipmi_msghandler.c | 415 +++++++++++++++++------------------- + 1 file changed, 198 insertions(+), 217 deletions(-) + +--- a/drivers/char/ipmi/ipmi_msghandler.c ++++ b/drivers/char/ipmi/ipmi_msghandler.c +@@ -39,7 +39,9 @@ + + #define IPMI_DRIVER_VERSION "39.2" + +-static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); ++static struct ipmi_recv_msg *ipmi_alloc_recv_msg(struct ipmi_user *user); ++static void ipmi_set_recv_msg_user(struct ipmi_recv_msg *msg, ++ struct ipmi_user *user); + static int ipmi_init_msghandler(void); + static void smi_recv_tasklet(struct tasklet_struct *t); + static void handle_new_recv_msgs(struct ipmi_smi *intf); +@@ -939,13 +941,11 @@ static int deliver_response(struct ipmi_ + * risk. At this moment, simply skip it in that case. + */ + ipmi_free_recv_msg(msg); +- atomic_dec(&msg->user->nr_msgs); + } else { + int index; + struct ipmi_user *user = acquire_ipmi_user(msg->user, &index); + + if (user) { +- atomic_dec(&user->nr_msgs); + user->handler->ipmi_recv_hndl(msg, user->handler_data); + release_ipmi_user(user, index); + } else { +@@ -1634,8 +1634,7 @@ int ipmi_set_gets_events(struct ipmi_use + spin_unlock_irqrestore(&intf->events_lock, flags); + + list_for_each_entry_safe(msg, msg2, &msgs, link) { +- msg->user = user; +- kref_get(&user->refcount); ++ ipmi_set_recv_msg_user(msg, user); + deliver_local_response(intf, msg); + } + +@@ -2309,22 +2308,15 @@ static int i_ipmi_request(struct ipmi_us + struct ipmi_recv_msg *recv_msg; + int rv = 0; + +- if (user) { +- if (atomic_add_return(1, &user->nr_msgs) > max_msgs_per_user) { +- /* Decrement will happen at the end of the routine. */ +- rv = -EBUSY; +- goto out; +- } +- } +- +- if (supplied_recv) ++ if (supplied_recv) { + recv_msg = supplied_recv; +- else { +- recv_msg = ipmi_alloc_recv_msg(); +- if (recv_msg == NULL) { +- rv = -ENOMEM; +- goto out; +- } ++ recv_msg->user = user; ++ if (user) ++ atomic_inc(&user->nr_msgs); ++ } else { ++ recv_msg = ipmi_alloc_recv_msg(user); ++ if (IS_ERR(recv_msg)) ++ return PTR_ERR(recv_msg); + } + recv_msg->user_msg_data = user_msg_data; + +@@ -2335,8 +2327,7 @@ static int i_ipmi_request(struct ipmi_us + if (smi_msg == NULL) { + if (!supplied_recv) + ipmi_free_recv_msg(recv_msg); +- rv = -ENOMEM; +- goto out; ++ return -ENOMEM; + } + } + +@@ -2346,10 +2337,6 @@ static int i_ipmi_request(struct ipmi_us + goto out_err; + } + +- recv_msg->user = user; +- if (user) +- /* The put happens when the message is freed. */ +- kref_get(&user->refcount); + recv_msg->msgid = msgid; + /* + * Store the message to send in the receive message so timeout +@@ -2378,8 +2365,10 @@ static int i_ipmi_request(struct ipmi_us + + if (rv) { + out_err: +- ipmi_free_smi_msg(smi_msg); +- ipmi_free_recv_msg(recv_msg); ++ if (!supplied_smi) ++ ipmi_free_smi_msg(smi_msg); ++ if (!supplied_recv) ++ ipmi_free_recv_msg(recv_msg); + } else { + dev_dbg(intf->si_dev, "Send: %*ph\n", + smi_msg->data_size, smi_msg->data); +@@ -2388,9 +2377,6 @@ out_err: + } + rcu_read_unlock(); + +-out: +- if (rv && user) +- atomic_dec(&user->nr_msgs); + return rv; + } + +@@ -3883,7 +3869,7 @@ static int handle_ipmb_get_msg_cmd(struc + unsigned char chan; + struct ipmi_user *user = NULL; + struct ipmi_ipmb_addr *ipmb_addr; +- struct ipmi_recv_msg *recv_msg; ++ struct ipmi_recv_msg *recv_msg = NULL; + + if (msg->rsp_size < 10) { + /* Message not big enough, just ignore it. */ +@@ -3904,9 +3890,8 @@ static int handle_ipmb_get_msg_cmd(struc + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); + if (rcvr) { + user = rcvr->user; +- kref_get(&user->refcount); +- } else +- user = NULL; ++ recv_msg = ipmi_alloc_recv_msg(user); ++ } + rcu_read_unlock(); + + if (user == NULL) { +@@ -3941,47 +3926,41 @@ static int handle_ipmb_get_msg_cmd(struc + rv = -1; + } + rcu_read_unlock(); +- } else { +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { +- /* +- * We couldn't allocate memory for the +- * message, so requeue it for handling +- * later. +- */ +- rv = 1; +- kref_put(&user->refcount, free_user); +- } else { +- /* Extract the source address from the data. */ +- ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; +- ipmb_addr->addr_type = IPMI_IPMB_ADDR_TYPE; +- ipmb_addr->slave_addr = msg->rsp[6]; +- ipmb_addr->lun = msg->rsp[7] & 3; +- ipmb_addr->channel = msg->rsp[3] & 0xf; ++ } else if (!IS_ERR(recv_msg)) { ++ /* Extract the source address from the data. */ ++ ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; ++ ipmb_addr->addr_type = IPMI_IPMB_ADDR_TYPE; ++ ipmb_addr->slave_addr = msg->rsp[6]; ++ ipmb_addr->lun = msg->rsp[7] & 3; ++ ipmb_addr->channel = msg->rsp[3] & 0xf; + +- /* +- * Extract the rest of the message information +- * from the IPMB header. +- */ +- recv_msg->user = user; +- recv_msg->recv_type = IPMI_CMD_RECV_TYPE; +- recv_msg->msgid = msg->rsp[7] >> 2; +- recv_msg->msg.netfn = msg->rsp[4] >> 2; +- recv_msg->msg.cmd = msg->rsp[8]; +- recv_msg->msg.data = recv_msg->msg_data; ++ /* ++ * Extract the rest of the message information ++ * from the IPMB header. ++ */ ++ recv_msg->recv_type = IPMI_CMD_RECV_TYPE; ++ recv_msg->msgid = msg->rsp[7] >> 2; ++ recv_msg->msg.netfn = msg->rsp[4] >> 2; ++ recv_msg->msg.cmd = msg->rsp[8]; ++ recv_msg->msg.data = recv_msg->msg_data; + +- /* +- * We chop off 10, not 9 bytes because the checksum +- * at the end also needs to be removed. +- */ +- recv_msg->msg.data_len = msg->rsp_size - 10; +- memcpy(recv_msg->msg_data, &msg->rsp[9], +- msg->rsp_size - 10); +- if (deliver_response(intf, recv_msg)) +- ipmi_inc_stat(intf, unhandled_commands); +- else +- ipmi_inc_stat(intf, handled_commands); +- } ++ /* ++ * We chop off 10, not 9 bytes because the checksum ++ * at the end also needs to be removed. ++ */ ++ recv_msg->msg.data_len = msg->rsp_size - 10; ++ memcpy(recv_msg->msg_data, &msg->rsp[9], ++ msg->rsp_size - 10); ++ if (deliver_response(intf, recv_msg)) ++ ipmi_inc_stat(intf, unhandled_commands); ++ else ++ ipmi_inc_stat(intf, handled_commands); ++ } else { ++ /* ++ * We couldn't allocate memory for the message, so ++ * requeue it for handling later. ++ */ ++ rv = 1; + } + + return rv; +@@ -3994,7 +3973,7 @@ static int handle_ipmb_direct_rcv_cmd(st + int rv = 0; + struct ipmi_user *user = NULL; + struct ipmi_ipmb_direct_addr *daddr; +- struct ipmi_recv_msg *recv_msg; ++ struct ipmi_recv_msg *recv_msg = NULL; + unsigned char netfn = msg->rsp[0] >> 2; + unsigned char cmd = msg->rsp[3]; + +@@ -4003,9 +3982,8 @@ static int handle_ipmb_direct_rcv_cmd(st + rcvr = find_cmd_rcvr(intf, netfn, cmd, 0); + if (rcvr) { + user = rcvr->user; +- kref_get(&user->refcount); +- } else +- user = NULL; ++ recv_msg = ipmi_alloc_recv_msg(user); ++ } + rcu_read_unlock(); + + if (user == NULL) { +@@ -4032,44 +4010,38 @@ static int handle_ipmb_direct_rcv_cmd(st + rv = -1; + } + rcu_read_unlock(); +- } else { +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { +- /* +- * We couldn't allocate memory for the +- * message, so requeue it for handling +- * later. +- */ +- rv = 1; +- kref_put(&user->refcount, free_user); +- } else { +- /* Extract the source address from the data. */ +- daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr; +- daddr->addr_type = IPMI_IPMB_DIRECT_ADDR_TYPE; +- daddr->channel = 0; +- daddr->slave_addr = msg->rsp[1]; +- daddr->rs_lun = msg->rsp[0] & 3; +- daddr->rq_lun = msg->rsp[2] & 3; ++ } else if (!IS_ERR(recv_msg)) { ++ /* Extract the source address from the data. */ ++ daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr; ++ daddr->addr_type = IPMI_IPMB_DIRECT_ADDR_TYPE; ++ daddr->channel = 0; ++ daddr->slave_addr = msg->rsp[1]; ++ daddr->rs_lun = msg->rsp[0] & 3; ++ daddr->rq_lun = msg->rsp[2] & 3; + +- /* +- * Extract the rest of the message information +- * from the IPMB header. +- */ +- recv_msg->user = user; +- recv_msg->recv_type = IPMI_CMD_RECV_TYPE; +- recv_msg->msgid = (msg->rsp[2] >> 2); +- recv_msg->msg.netfn = msg->rsp[0] >> 2; +- recv_msg->msg.cmd = msg->rsp[3]; +- recv_msg->msg.data = recv_msg->msg_data; +- +- recv_msg->msg.data_len = msg->rsp_size - 4; +- memcpy(recv_msg->msg_data, msg->rsp + 4, +- msg->rsp_size - 4); +- if (deliver_response(intf, recv_msg)) +- ipmi_inc_stat(intf, unhandled_commands); +- else +- ipmi_inc_stat(intf, handled_commands); +- } ++ /* ++ * Extract the rest of the message information ++ * from the IPMB header. ++ */ ++ recv_msg->recv_type = IPMI_CMD_RECV_TYPE; ++ recv_msg->msgid = (msg->rsp[2] >> 2); ++ recv_msg->msg.netfn = msg->rsp[0] >> 2; ++ recv_msg->msg.cmd = msg->rsp[3]; ++ recv_msg->msg.data = recv_msg->msg_data; ++ ++ recv_msg->msg.data_len = msg->rsp_size - 4; ++ memcpy(recv_msg->msg_data, msg->rsp + 4, ++ msg->rsp_size - 4); ++ if (deliver_response(intf, recv_msg)) ++ ipmi_inc_stat(intf, unhandled_commands); ++ else ++ ipmi_inc_stat(intf, handled_commands); ++ } else { ++ /* ++ * We couldn't allocate memory for the message, so ++ * requeue it for handling later. ++ */ ++ rv = 1; + } + + return rv; +@@ -4183,7 +4155,7 @@ static int handle_lan_get_msg_cmd(struct + unsigned char chan; + struct ipmi_user *user = NULL; + struct ipmi_lan_addr *lan_addr; +- struct ipmi_recv_msg *recv_msg; ++ struct ipmi_recv_msg *recv_msg = NULL; + + if (msg->rsp_size < 12) { + /* Message not big enough, just ignore it. */ +@@ -4204,9 +4176,8 @@ static int handle_lan_get_msg_cmd(struct + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); + if (rcvr) { + user = rcvr->user; +- kref_get(&user->refcount); +- } else +- user = NULL; ++ recv_msg = ipmi_alloc_recv_msg(user); ++ } + rcu_read_unlock(); + + if (user == NULL) { +@@ -4218,49 +4189,44 @@ static int handle_lan_get_msg_cmd(struct + * them to be freed. + */ + rv = 0; +- } else { +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { +- /* +- * We couldn't allocate memory for the +- * message, so requeue it for handling later. +- */ +- rv = 1; +- kref_put(&user->refcount, free_user); +- } else { +- /* Extract the source address from the data. */ +- lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; +- lan_addr->addr_type = IPMI_LAN_ADDR_TYPE; +- lan_addr->session_handle = msg->rsp[4]; +- lan_addr->remote_SWID = msg->rsp[8]; +- lan_addr->local_SWID = msg->rsp[5]; +- lan_addr->lun = msg->rsp[9] & 3; +- lan_addr->channel = msg->rsp[3] & 0xf; +- lan_addr->privilege = msg->rsp[3] >> 4; ++ } else if (!IS_ERR(recv_msg)) { ++ /* Extract the source address from the data. */ ++ lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; ++ lan_addr->addr_type = IPMI_LAN_ADDR_TYPE; ++ lan_addr->session_handle = msg->rsp[4]; ++ lan_addr->remote_SWID = msg->rsp[8]; ++ lan_addr->local_SWID = msg->rsp[5]; ++ lan_addr->lun = msg->rsp[9] & 3; ++ lan_addr->channel = msg->rsp[3] & 0xf; ++ lan_addr->privilege = msg->rsp[3] >> 4; + +- /* +- * Extract the rest of the message information +- * from the IPMB header. +- */ +- recv_msg->user = user; +- recv_msg->recv_type = IPMI_CMD_RECV_TYPE; +- recv_msg->msgid = msg->rsp[9] >> 2; +- recv_msg->msg.netfn = msg->rsp[6] >> 2; +- recv_msg->msg.cmd = msg->rsp[10]; +- recv_msg->msg.data = recv_msg->msg_data; ++ /* ++ * Extract the rest of the message information ++ * from the IPMB header. ++ */ ++ recv_msg->recv_type = IPMI_CMD_RECV_TYPE; ++ recv_msg->msgid = msg->rsp[9] >> 2; ++ recv_msg->msg.netfn = msg->rsp[6] >> 2; ++ recv_msg->msg.cmd = msg->rsp[10]; ++ recv_msg->msg.data = recv_msg->msg_data; + +- /* +- * We chop off 12, not 11 bytes because the checksum +- * at the end also needs to be removed. +- */ +- recv_msg->msg.data_len = msg->rsp_size - 12; +- memcpy(recv_msg->msg_data, &msg->rsp[11], +- msg->rsp_size - 12); +- if (deliver_response(intf, recv_msg)) +- ipmi_inc_stat(intf, unhandled_commands); +- else +- ipmi_inc_stat(intf, handled_commands); +- } ++ /* ++ * We chop off 12, not 11 bytes because the checksum ++ * at the end also needs to be removed. ++ */ ++ recv_msg->msg.data_len = msg->rsp_size - 12; ++ memcpy(recv_msg->msg_data, &msg->rsp[11], ++ msg->rsp_size - 12); ++ if (deliver_response(intf, recv_msg)) ++ ipmi_inc_stat(intf, unhandled_commands); ++ else ++ ipmi_inc_stat(intf, handled_commands); ++ } else { ++ /* ++ * We couldn't allocate memory for the message, so ++ * requeue it for handling later. ++ */ ++ rv = 1; + } + + return rv; +@@ -4282,7 +4248,7 @@ static int handle_oem_get_msg_cmd(struct + unsigned char chan; + struct ipmi_user *user = NULL; + struct ipmi_system_interface_addr *smi_addr; +- struct ipmi_recv_msg *recv_msg; ++ struct ipmi_recv_msg *recv_msg = NULL; + + /* + * We expect the OEM SW to perform error checking +@@ -4311,9 +4277,8 @@ static int handle_oem_get_msg_cmd(struct + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); + if (rcvr) { + user = rcvr->user; +- kref_get(&user->refcount); +- } else +- user = NULL; ++ recv_msg = ipmi_alloc_recv_msg(user); ++ } + rcu_read_unlock(); + + if (user == NULL) { +@@ -4326,48 +4291,42 @@ static int handle_oem_get_msg_cmd(struct + */ + + rv = 0; +- } else { +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { +- /* +- * We couldn't allocate memory for the +- * message, so requeue it for handling +- * later. +- */ +- rv = 1; +- kref_put(&user->refcount, free_user); +- } else { +- /* +- * OEM Messages are expected to be delivered via +- * the system interface to SMS software. We might +- * need to visit this again depending on OEM +- * requirements +- */ +- smi_addr = ((struct ipmi_system_interface_addr *) +- &recv_msg->addr); +- smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; +- smi_addr->channel = IPMI_BMC_CHANNEL; +- smi_addr->lun = msg->rsp[0] & 3; +- +- recv_msg->user = user; +- recv_msg->user_msg_data = NULL; +- recv_msg->recv_type = IPMI_OEM_RECV_TYPE; +- recv_msg->msg.netfn = msg->rsp[0] >> 2; +- recv_msg->msg.cmd = msg->rsp[1]; +- recv_msg->msg.data = recv_msg->msg_data; ++ } else if (!IS_ERR(recv_msg)) { ++ /* ++ * OEM Messages are expected to be delivered via ++ * the system interface to SMS software. We might ++ * need to visit this again depending on OEM ++ * requirements ++ */ ++ smi_addr = ((struct ipmi_system_interface_addr *) ++ &recv_msg->addr); ++ smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; ++ smi_addr->channel = IPMI_BMC_CHANNEL; ++ smi_addr->lun = msg->rsp[0] & 3; ++ ++ recv_msg->user_msg_data = NULL; ++ recv_msg->recv_type = IPMI_OEM_RECV_TYPE; ++ recv_msg->msg.netfn = msg->rsp[0] >> 2; ++ recv_msg->msg.cmd = msg->rsp[1]; ++ recv_msg->msg.data = recv_msg->msg_data; + +- /* +- * The message starts at byte 4 which follows the +- * Channel Byte in the "GET MESSAGE" command +- */ +- recv_msg->msg.data_len = msg->rsp_size - 4; +- memcpy(recv_msg->msg_data, &msg->rsp[4], +- msg->rsp_size - 4); +- if (deliver_response(intf, recv_msg)) +- ipmi_inc_stat(intf, unhandled_commands); +- else +- ipmi_inc_stat(intf, handled_commands); +- } ++ /* ++ * The message starts at byte 4 which follows the ++ * Channel Byte in the "GET MESSAGE" command ++ */ ++ recv_msg->msg.data_len = msg->rsp_size - 4; ++ memcpy(recv_msg->msg_data, &msg->rsp[4], ++ msg->rsp_size - 4); ++ if (deliver_response(intf, recv_msg)) ++ ipmi_inc_stat(intf, unhandled_commands); ++ else ++ ipmi_inc_stat(intf, handled_commands); ++ } else { ++ /* ++ * We couldn't allocate memory for the message, so ++ * requeue it for handling later. ++ */ ++ rv = 1; + } + + return rv; +@@ -4426,8 +4385,8 @@ static int handle_read_event_rsp(struct + if (!user->gets_events) + continue; + +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { ++ recv_msg = ipmi_alloc_recv_msg(user); ++ if (IS_ERR(recv_msg)) { + rcu_read_unlock(); + list_for_each_entry_safe(recv_msg, recv_msg2, &msgs, + link) { +@@ -4446,8 +4405,6 @@ static int handle_read_event_rsp(struct + deliver_count++; + + copy_event_into_recv_msg(recv_msg, msg); +- recv_msg->user = user; +- kref_get(&user->refcount); + list_add_tail(&recv_msg->link, &msgs); + } + srcu_read_unlock(&intf->users_srcu, index); +@@ -4463,8 +4420,8 @@ static int handle_read_event_rsp(struct + * No one to receive the message, put it in queue if there's + * not already too many things in the queue. + */ +- recv_msg = ipmi_alloc_recv_msg(); +- if (!recv_msg) { ++ recv_msg = ipmi_alloc_recv_msg(NULL); ++ if (IS_ERR(recv_msg)) { + /* + * We couldn't allocate memory for the + * message, so requeue it for handling +@@ -5156,27 +5113,51 @@ static void free_recv_msg(struct ipmi_re + kfree(msg); + } + +-static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) ++static struct ipmi_recv_msg *ipmi_alloc_recv_msg(struct ipmi_user *user) + { + struct ipmi_recv_msg *rv; + ++ if (user) { ++ if (atomic_add_return(1, &user->nr_msgs) > max_msgs_per_user) { ++ atomic_dec(&user->nr_msgs); ++ return ERR_PTR(-EBUSY); ++ } ++ } ++ + rv = kmalloc(sizeof(struct ipmi_recv_msg), GFP_ATOMIC); +- if (rv) { +- rv->user = NULL; +- rv->done = free_recv_msg; +- atomic_inc(&recv_msg_inuse_count); ++ if (!rv) { ++ if (user) ++ atomic_dec(&user->nr_msgs); ++ return ERR_PTR(-ENOMEM); + } ++ ++ rv->user = user; ++ rv->done = free_recv_msg; ++ if (user) ++ kref_get(&user->refcount); ++ atomic_inc(&recv_msg_inuse_count); + return rv; + } + + void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) + { +- if (msg->user && !oops_in_progress) ++ if (msg->user && !oops_in_progress) { ++ atomic_dec(&msg->user->nr_msgs); + kref_put(&msg->user->refcount, free_user); ++ } + msg->done(msg); + } + EXPORT_SYMBOL(ipmi_free_recv_msg); + ++static void ipmi_set_recv_msg_user(struct ipmi_recv_msg *msg, ++ struct ipmi_user *user) ++{ ++ WARN_ON_ONCE(msg->user); /* User should not be set. */ ++ msg->user = user; ++ atomic_inc(&user->nr_msgs); ++ kref_get(&user->refcount); ++} ++ + static atomic_t panic_done_count = ATOMIC_INIT(0); + + static void dummy_smi_done_handler(struct ipmi_smi_msg *msg) diff --git a/queue-6.6/ksmbd-add-max-ip-connections-parameter.patch b/queue-6.6/ksmbd-add-max-ip-connections-parameter.patch new file mode 100644 index 0000000000..c969a1a975 --- /dev/null +++ b/queue-6.6/ksmbd-add-max-ip-connections-parameter.patch @@ -0,0 +1,128 @@ +From stable+bounces-185686-greg=kroah.com@vger.kernel.org Tue Oct 14 16:51:09 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 10:46:46 -0400 +Subject: ksmbd: add max ip connections parameter +To: stable@vger.kernel.org +Cc: Namjae Jeon , Steve French , Sasha Levin +Message-ID: <20251014144646.91722-1-sashal@kernel.org> + +From: Namjae Jeon + +[ Upstream commit d8b6dc9256762293048bf122fc11c4e612d0ef5d ] + +This parameter set the maximum number of connections per ip address. +The default is 8. + +Cc: stable@vger.kernel.org +Fixes: c0d41112f1a5 ("ksmbd: extend the connection limiting mechanism to support IPv6") +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +[ Adjust reserved room ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/ksmbd_netlink.h | 5 +++-- + fs/smb/server/server.h | 1 + + fs/smb/server/transport_ipc.c | 3 +++ + fs/smb/server/transport_tcp.c | 28 +++++++++++++++++----------- + 4 files changed, 24 insertions(+), 13 deletions(-) + +--- a/fs/smb/server/ksmbd_netlink.h ++++ b/fs/smb/server/ksmbd_netlink.h +@@ -108,10 +108,11 @@ struct ksmbd_startup_request { + __u32 smb2_max_credits; /* MAX credits */ + __u32 smbd_max_io_size; /* smbd read write size */ + __u32 max_connections; /* Number of maximum simultaneous connections */ +- __u32 reserved[126]; /* Reserved room */ ++ __u32 max_ip_connections; /* Number of maximum connection per ip address */ ++ __u32 reserved[125]; /* Reserved room */ + __u32 ifc_list_sz; /* interfaces list size */ + __s8 ____payload[]; +-}; ++} __packed; + + #define KSMBD_STARTUP_CONFIG_INTERFACES(s) ((s)->____payload) + +--- a/fs/smb/server/server.h ++++ b/fs/smb/server/server.h +@@ -43,6 +43,7 @@ struct ksmbd_server_config { + unsigned int auth_mechs; + unsigned int max_connections; + unsigned int max_inflight_req; ++ unsigned int max_ip_connections; + + char *conf[SERVER_CONF_WORK_GROUP + 1]; + }; +--- a/fs/smb/server/transport_ipc.c ++++ b/fs/smb/server/transport_ipc.c +@@ -321,6 +321,9 @@ static int ipc_server_config_on_startup( + if (req->max_connections) + server_conf.max_connections = req->max_connections; + ++ if (req->max_ip_connections) ++ server_conf.max_ip_connections = req->max_ip_connections; ++ + ret = ksmbd_set_netbios_name(req->netbios_name); + ret |= ksmbd_set_server_string(req->server_string); + ret |= ksmbd_set_work_group(req->work_group); +--- a/fs/smb/server/transport_tcp.c ++++ b/fs/smb/server/transport_tcp.c +@@ -240,6 +240,7 @@ static int ksmbd_kthread_fn(void *p) + struct interface *iface = (struct interface *)p; + struct ksmbd_conn *conn; + int ret; ++ unsigned int max_ip_conns; + + while (!kthread_should_stop()) { + mutex_lock(&iface->sock_release_lock); +@@ -257,34 +258,39 @@ static int ksmbd_kthread_fn(void *p) + continue; + } + ++ if (!server_conf.max_ip_connections) ++ goto skip_max_ip_conns_limit; ++ + /* + * Limits repeated connections from clients with the same IP. + */ ++ max_ip_conns = 0; + down_read(&conn_list_lock); +- list_for_each_entry(conn, &conn_list, conns_list) ++ list_for_each_entry(conn, &conn_list, conns_list) { + #if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) { + if (memcmp(&client_sk->sk->sk_v6_daddr, +- &conn->inet6_addr, 16) == 0) { +- ret = -EAGAIN; +- break; +- } ++ &conn->inet6_addr, 16) == 0) ++ max_ip_conns++; + } else if (inet_sk(client_sk->sk)->inet_daddr == +- conn->inet_addr) { +- ret = -EAGAIN; +- break; +- } ++ conn->inet_addr) ++ max_ip_conns++; + #else + if (inet_sk(client_sk->sk)->inet_daddr == +- conn->inet_addr) { ++ conn->inet_addr) ++ max_ip_conns++; ++#endif ++ if (server_conf.max_ip_connections <= max_ip_conns) { + ret = -EAGAIN; + break; + } +-#endif ++ } + up_read(&conn_list_lock); + if (ret == -EAGAIN) + continue; + ++skip_max_ip_conns_limit: ++ + if (server_conf.max_connections && + atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { + pr_info_ratelimited("Limit the maximum number of connections(%u)\n", diff --git a/queue-6.6/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch b/queue-6.6/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch new file mode 100644 index 0000000000..c42b38d4da --- /dev/null +++ b/queue-6.6/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch @@ -0,0 +1,89 @@ +From stable+bounces-186094-greg=kroah.com@vger.kernel.org Thu Oct 16 15:03:11 2025 +From: Sasha Levin +Date: Thu, 16 Oct 2025 09:00:21 -0400 +Subject: KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 +To: stable@vger.kernel.org +Cc: Sean Christopherson , Sandipan Das , Sasha Levin +Message-ID: <20251016130021.3283271-1-sashal@kernel.org> + +From: Sean Christopherson + +[ Upstream commit 68e61f6fd65610e73b17882f86fedfd784d99229 ] + +Emulate PERF_CNTR_GLOBAL_STATUS_SET when PerfMonV2 is enumerated to the +guest, as the MSR is supposed to exist in all AMD v2 PMUs. + +Fixes: 4a2771895ca6 ("KVM: x86/svm/pmu: Add AMD PerfMonV2 support") +Cc: stable@vger.kernel.org +Cc: Sandipan Das +Link: https://lore.kernel.org/r/20250711172746.1579423-1-seanjc@google.com +Signed-off-by: Sean Christopherson +[ changed global_status_rsvd field to global_status_mask ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kvm/pmu.c | 5 +++++ + arch/x86/kvm/svm/pmu.c | 1 + + arch/x86/kvm/x86.c | 2 ++ + 4 files changed, 9 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -661,6 +661,7 @@ + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 + #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 ++#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 + + /* AMD Last Branch Record MSRs */ + #define MSR_AMD64_LBR_SELECT 0xc000010e +--- a/arch/x86/kvm/pmu.c ++++ b/arch/x86/kvm/pmu.c +@@ -588,6 +588,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcp + msr_info->data = pmu->global_ctrl; + break; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + msr_info->data = 0; + break; +@@ -649,6 +650,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcp + if (!msr_info->host_initiated) + pmu->global_status &= ~data; + break; ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: ++ if (!msr_info->host_initiated) ++ pmu->global_status |= data & ~pmu->global_status_mask; ++ break; + default: + kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); + return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info); +--- a/arch/x86/kvm/svm/pmu.c ++++ b/arch/x86/kvm/svm/pmu.c +@@ -117,6 +117,7 @@ static bool amd_is_valid_msr(struct kvm_ + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + return pmu->version > 1; + default: + if (msr > MSR_F15H_PERF_CTR5 && +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1495,6 +1495,7 @@ static const u32 msrs_to_save_pmu[] = { + MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, ++ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, + }; + + static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + +@@ -7194,6 +7195,7 @@ static void kvm_probe_msr_to_save(u32 ms + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) + return; + break; diff --git a/queue-6.6/kvm-svm-skip-fastpath-emulation-on-vm-exit-if-next-rip-isn-t-valid.patch b/queue-6.6/kvm-svm-skip-fastpath-emulation-on-vm-exit-if-next-rip-isn-t-valid.patch new file mode 100644 index 0000000000..7a5419d805 --- /dev/null +++ b/queue-6.6/kvm-svm-skip-fastpath-emulation-on-vm-exit-if-next-rip-isn-t-valid.patch @@ -0,0 +1,94 @@ +From stable+bounces-185687-greg=kroah.com@vger.kernel.org Tue Oct 14 16:48:58 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 10:48:51 -0400 +Subject: KVM: SVM: Skip fastpath emulation on VM-Exit if next RIP isn't valid +To: stable@vger.kernel.org +Cc: Sean Christopherson , Sasha Levin +Message-ID: <20251014144851.94249-1-sashal@kernel.org> + +From: Sean Christopherson + +[ Upstream commit 0910dd7c9ad45a2605c45fd2bf3d1bcac087687c ] + +Skip the WRMSR and HLT fastpaths in SVM's VM-Exit handler if the next RIP +isn't valid, e.g. because KVM is running with nrips=false. SVM must +decode and emulate to skip the instruction if the CPU doesn't provide the +next RIP, and getting the instruction bytes to decode requires reading +guest memory. Reading guest memory through the emulator can fault, i.e. +can sleep, which is disallowed since the fastpath handlers run with IRQs +disabled. + + BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:106 + in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 32611, name: qemu + preempt_count: 1, expected: 0 + INFO: lockdep is turned off. + irq event stamp: 30580 + hardirqs last enabled at (30579): [] vcpu_run+0x1787/0x1db0 [kvm] + hardirqs last disabled at (30580): [] __schedule+0x1e2/0xed0 + softirqs last enabled at (30570): [] fpu_swap_kvm_fpstate+0x44/0x210 + softirqs last disabled at (30568): [] fpu_swap_kvm_fpstate+0x44/0x210 + CPU: 298 UID: 0 PID: 32611 Comm: qemu Tainted: G U 6.16.0-smp--e6c618b51cfe-sleep #782 NONE + Tainted: [U]=USER + Hardware name: Google Astoria-Turin/astoria, BIOS 0.20241223.2-0 01/17/2025 + Call Trace: + + dump_stack_lvl+0x7d/0xb0 + __might_resched+0x271/0x290 + __might_fault+0x28/0x80 + kvm_vcpu_read_guest_page+0x8d/0xc0 [kvm] + kvm_fetch_guest_virt+0x92/0xc0 [kvm] + __do_insn_fetch_bytes+0xf3/0x1e0 [kvm] + x86_decode_insn+0xd1/0x1010 [kvm] + x86_emulate_instruction+0x105/0x810 [kvm] + __svm_skip_emulated_instruction+0xc4/0x140 [kvm_amd] + handle_fastpath_invd+0xc4/0x1a0 [kvm] + vcpu_run+0x11a1/0x1db0 [kvm] + kvm_arch_vcpu_ioctl_run+0x5cc/0x730 [kvm] + kvm_vcpu_ioctl+0x578/0x6a0 [kvm] + __se_sys_ioctl+0x6d/0xb0 + do_syscall_64+0x8a/0x2c0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + RIP: 0033:0x7f479d57a94b + + +Note, this is essentially a reapply of commit 5c30e8101e8d ("KVM: SVM: +Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"), but with +different justification (KVM now grabs SRCU when skipping the instruction +for other reasons). + +Fixes: b439eb8ab578 ("Revert "KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250805190526.1453366-2-seanjc@google.com +Signed-off-by: Sean Christopherson +[ adapted switch-based MSR/HLT fastpath to if-based MSR-only check ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -4156,11 +4156,20 @@ static int svm_vcpu_pre_run(struct kvm_v + + static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ /* ++ * Next RIP must be provided as IRQs are disabled, and accessing guest ++ * memory to decode the instruction might fault, i.e. might sleep. ++ */ ++ if (!nrips || !svm->vmcb->control.next_rip) ++ return EXIT_FASTPATH_NONE; ++ + if (is_guest_mode(vcpu)) + return EXIT_FASTPATH_NONE; + +- if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && +- to_svm(vcpu)->vmcb->control.exit_info_1) ++ if (svm->vmcb->control.exit_code == SVM_EXIT_MSR && ++ svm->vmcb->control.exit_info_1) + return handle_fastpath_set_msr_irqoff(vcpu); + + return EXIT_FASTPATH_NONE; diff --git a/queue-6.6/media-mc-clear-minor-number-before-put-device.patch b/queue-6.6/media-mc-clear-minor-number-before-put-device.patch new file mode 100644 index 0000000000..4e2255605d --- /dev/null +++ b/queue-6.6/media-mc-clear-minor-number-before-put-device.patch @@ -0,0 +1,51 @@ +From stable+bounces-185518-greg=kroah.com@vger.kernel.org Mon Oct 13 23:45:09 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 17:44:57 -0400 +Subject: media: mc: Clear minor number before put device +To: stable@vger.kernel.org +Cc: Edward Adam Davis , syzbot+031d0cfd7c362817963f@syzkaller.appspotmail.com, Sakari Ailus , Hans Verkuil , Sasha Levin +Message-ID: <20251013214457.3636880-1-sashal@kernel.org> + +From: Edward Adam Davis + +[ Upstream commit 8cfc8cec1b4da88a47c243a11f384baefd092a50 ] + +The device minor should not be cleared after the device is released. + +Fixes: 9e14868dc952 ("media: mc: Clear minor number reservation at unregistration time") +Cc: stable@vger.kernel.org +Reported-by: syzbot+031d0cfd7c362817963f@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=031d0cfd7c362817963f +Tested-by: syzbot+031d0cfd7c362817963f@syzkaller.appspotmail.com +Signed-off-by: Edward Adam Davis +Signed-off-by: Sakari Ailus +Signed-off-by: Hans Verkuil +[ moved clear_bit from media_devnode_release callback to media_devnode_unregister before put_device ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/mc/mc-devnode.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/media/mc/mc-devnode.c ++++ b/drivers/media/mc/mc-devnode.c +@@ -50,11 +50,6 @@ static void media_devnode_release(struct + { + struct media_devnode *devnode = to_media_devnode(cd); + +- mutex_lock(&media_devnode_lock); +- /* Mark device node number as free */ +- clear_bit(devnode->minor, media_devnode_nums); +- mutex_unlock(&media_devnode_lock); +- + /* Release media_devnode and perform other cleanups as needed. */ + if (devnode->release) + devnode->release(devnode); +@@ -283,6 +278,7 @@ void media_devnode_unregister(struct med + /* Delete the cdev on this minor as well */ + cdev_device_del(&devnode->cdev, &devnode->dev); + devnode->media_dev = NULL; ++ clear_bit(devnode->minor, media_devnode_nums); + mutex_unlock(&media_devnode_lock); + + put_device(&devnode->dev); diff --git a/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch new file mode 100644 index 0000000000..3ce9d7c542 --- /dev/null +++ b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch @@ -0,0 +1,39 @@ +From stable+bounces-185536-greg=kroah.com@vger.kernel.org Tue Oct 14 00:48:21 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 18:48:11 -0400 +Subject: mfd: intel_soc_pmic_chtdc_ti: Drop unneeded assignment for cache_type +To: stable@vger.kernel.org +Cc: Andy Shevchenko , Hans de Goede , Lee Jones , Sasha Levin +Message-ID: <20251013224812.3682599-2-sashal@kernel.org> + +From: Andy Shevchenko + +[ Upstream commit 9eb99c08508714906db078b5efbe075329a3fb06 ] + +REGCACHE_NONE is the default type of the cache when not provided. +Drop unneeded explicit assignment to it. + +Note, it's defined to 0, and if ever be redefined, it will break +literally a lot of the drivers, so it very unlikely to happen. + +Signed-off-by: Andy Shevchenko +Reviewed-by: Hans de Goede +Link: https://lore.kernel.org/r/20250129152823.1802273-1-andriy.shevchenko@linux.intel.com +Signed-off-by: Lee Jones +Stable-dep-of: 64e0d839c589 ("mfd: intel_soc_pmic_chtdc_ti: Set use_single_read regmap_config flag") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mfd/intel_soc_pmic_chtdc_ti.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/mfd/intel_soc_pmic_chtdc_ti.c ++++ b/drivers/mfd/intel_soc_pmic_chtdc_ti.c +@@ -82,7 +82,6 @@ static const struct regmap_config chtdc_ + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xff, +- .cache_type = REGCACHE_NONE, + }; + + static const struct regmap_irq chtdc_ti_irqs[] = { diff --git a/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-fix-invalid-regmap-config-max_register-value.patch b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-fix-invalid-regmap-config-max_register-value.patch new file mode 100644 index 0000000000..6d0574b36e --- /dev/null +++ b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-fix-invalid-regmap-config-max_register-value.patch @@ -0,0 +1,40 @@ +From stable+bounces-185535-greg=kroah.com@vger.kernel.org Tue Oct 14 00:48:20 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 18:48:10 -0400 +Subject: mfd: intel_soc_pmic_chtdc_ti: Fix invalid regmap-config max_register value +To: stable@vger.kernel.org +Cc: Hans de Goede , Andy Shevchenko , Lee Jones , Sasha Levin +Message-ID: <20251013224812.3682599-1-sashal@kernel.org> + +From: Hans de Goede + +[ Upstream commit 70e997e0107e5ed85c1a3ef2adfccbe351c29d71 ] + +The max_register = 128 setting in the regmap config is not valid. + +The Intel Dollar Cove TI PMIC has an eeprom unlock register at address 0x88 +and a number of EEPROM registers at 0xF?. Increase max_register to 0xff so +that these registers can be accessed. + +Signed-off-by: Hans de Goede +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20241208150028.325349-1-hdegoede@redhat.com +Signed-off-by: Lee Jones +Stable-dep-of: 64e0d839c589 ("mfd: intel_soc_pmic_chtdc_ti: Set use_single_read regmap_config flag") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mfd/intel_soc_pmic_chtdc_ti.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mfd/intel_soc_pmic_chtdc_ti.c ++++ b/drivers/mfd/intel_soc_pmic_chtdc_ti.c +@@ -81,7 +81,7 @@ static struct mfd_cell chtdc_ti_dev[] = + static const struct regmap_config chtdc_ti_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +- .max_register = 128, ++ .max_register = 0xff, + .cache_type = REGCACHE_NONE, + }; + diff --git a/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch new file mode 100644 index 0000000000..4925548924 --- /dev/null +++ b/queue-6.6/mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch @@ -0,0 +1,43 @@ +From stable+bounces-185537-greg=kroah.com@vger.kernel.org Tue Oct 14 00:48:22 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 18:48:12 -0400 +Subject: mfd: intel_soc_pmic_chtdc_ti: Set use_single_read regmap_config flag +To: stable@vger.kernel.org +Cc: Hans de Goede , Andy Shevchenko , Lee Jones , Sasha Levin +Message-ID: <20251013224812.3682599-3-sashal@kernel.org> + +From: Hans de Goede + +[ Upstream commit 64e0d839c589f4f2ecd2e3e5bdb5cee6ba6bade9 ] + +Testing has shown that reading multiple registers at once (for 10-bit +ADC values) does not work. Set the use_single_read regmap_config flag +to make regmap split these for us. + +This should fix temperature opregion accesses done by +drivers/acpi/pmic/intel_pmic_chtdc_ti.c and is also necessary for +the upcoming drivers for the ADC and battery MFD cells. + +Fixes: 6bac0606fdba ("mfd: Add support for Cherry Trail Dollar Cove TI PMIC") +Cc: stable@vger.kernel.org +Reviewed-by: Andy Shevchenko +Signed-off-by: Hans de Goede +Link: https://lore.kernel.org/r/20250804133240.312383-1-hansg@kernel.org +Signed-off-by: Lee Jones +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mfd/intel_soc_pmic_chtdc_ti.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/mfd/intel_soc_pmic_chtdc_ti.c ++++ b/drivers/mfd/intel_soc_pmic_chtdc_ti.c +@@ -82,6 +82,8 @@ static const struct regmap_config chtdc_ + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xff, ++ /* The hardware does not support reading multiple registers at once */ ++ .use_single_read = true, + }; + + static const struct regmap_irq chtdc_ti_irqs[] = { diff --git a/queue-6.6/misc-fastrpc-add-missing-dev_err-newlines.patch b/queue-6.6/misc-fastrpc-add-missing-dev_err-newlines.patch new file mode 100644 index 0000000000..535b3139b6 --- /dev/null +++ b/queue-6.6/misc-fastrpc-add-missing-dev_err-newlines.patch @@ -0,0 +1,76 @@ +From sashal@kernel.org Tue Oct 14 17:12:29 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 11:12:25 -0400 +Subject: misc: fastrpc: Add missing dev_err newlines +To: stable@vger.kernel.org +Cc: Ekansh Gupta , Dmitry Baryshkov , Caleb Connolly , Srinivas Kandagatla , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20251014151226.111084-1-sashal@kernel.org> + +From: Ekansh Gupta + +[ Upstream commit a150c68ae6369ea65b786fefd0b8aa0b075c041a ] + +Few dev_err calls are missing newlines. This can result in unrelated +lines getting appended which might make logs difficult to understand. +Add trailing newlines to avoid this. + +Signed-off-by: Ekansh Gupta +Reviewed-by: Dmitry Baryshkov +Reviewed-by: Caleb Connolly +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20240705075900.424100-3-srinivas.kandagatla@linaro.org +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8b5b456222fd ("misc: fastrpc: Save actual DMA size in fastrpc_map structure") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/misc/fastrpc.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/misc/fastrpc.c ++++ b/drivers/misc/fastrpc.c +@@ -325,7 +325,7 @@ static void fastrpc_free_map(struct kref + err = qcom_scm_assign_mem(map->phys, map->size, + &src_perms, &perm, 1); + if (err) { +- dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", ++ dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d\n", + map->phys, map->size, err); + return; + } +@@ -808,7 +808,7 @@ static int fastrpc_map_attach(struct fas + map->attr = attr; + err = qcom_scm_assign_mem(map->phys, (u64)map->size, &src_perms, dst_perms, 2); + if (err) { +- dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", ++ dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d\n", + map->phys, map->size, err); + goto map_err; + } +@@ -1240,7 +1240,7 @@ static bool is_session_rejected(struct f + * that does not support unsigned PD offload + */ + if (!fl->cctx->unsigned_support || !unsigned_pd_request) { +- dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD"); ++ dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD\n"); + return true; + } + } +@@ -1304,7 +1304,7 @@ static int fastrpc_init_create_static_pr + &src_perms, + fl->cctx->vmperms, fl->cctx->vmcount); + if (err) { +- dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", ++ dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d\n", + fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); + goto err_map; + } +@@ -1358,7 +1358,7 @@ err_invoke: + (u64)fl->cctx->remote_heap->size, + &src_perms, &dst_perms, 1); + if (err) +- dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", ++ dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d\n", + fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); + } + err_map: diff --git a/queue-6.6/misc-fastrpc-save-actual-dma-size-in-fastrpc_map-structure.patch b/queue-6.6/misc-fastrpc-save-actual-dma-size-in-fastrpc_map-structure.patch new file mode 100644 index 0000000000..4968c724a2 --- /dev/null +++ b/queue-6.6/misc-fastrpc-save-actual-dma-size-in-fastrpc_map-structure.patch @@ -0,0 +1,116 @@ +From stable+bounces-185691-greg=kroah.com@vger.kernel.org Tue Oct 14 17:15:04 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 11:12:26 -0400 +Subject: misc: fastrpc: Save actual DMA size in fastrpc_map structure +To: stable@vger.kernel.org +Cc: Ling Xu , stable@kernel.org, Dmitry Baryshkov , Ekansh Gupta , Dmitry Baryshkov , Srinivas Kandagatla , Greg Kroah-Hartman , Sasha Levin +Message-ID: <20251014151226.111084-2-sashal@kernel.org> + +From: Ling Xu + +[ Upstream commit 8b5b456222fd604079b5cf2af1f25ad690f54a25 ] + +For user passed fd buffer, map is created using DMA calls. The +map related information is stored in fastrpc_map structure. The +actual DMA size is not stored in the structure. Store the actual +size of buffer and check it against the user passed size. + +Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") +Cc: stable@kernel.org +Reviewed-by: Dmitry Baryshkov +Co-developed-by: Ekansh Gupta +Signed-off-by: Ekansh Gupta +Signed-off-by: Ling Xu +Reviewed-by: Dmitry Baryshkov +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20250912131236.303102-2-srini@kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/misc/fastrpc.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +--- a/drivers/misc/fastrpc.c ++++ b/drivers/misc/fastrpc.c +@@ -322,11 +322,11 @@ static void fastrpc_free_map(struct kref + + perm.vmid = QCOM_SCM_VMID_HLOS; + perm.perm = QCOM_SCM_PERM_RWX; +- err = qcom_scm_assign_mem(map->phys, map->size, ++ err = qcom_scm_assign_mem(map->phys, map->len, + &src_perms, &perm, 1); + if (err) { + dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d\n", +- map->phys, map->size, err); ++ map->phys, map->len, err); + return; + } + } +@@ -752,7 +752,8 @@ static int fastrpc_map_attach(struct fas + struct fastrpc_session_ctx *sess = fl->sctx; + struct fastrpc_map *map = NULL; + struct sg_table *table; +- int err = 0; ++ struct scatterlist *sgl = NULL; ++ int err = 0, sgl_index = 0; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) +@@ -789,7 +790,15 @@ static int fastrpc_map_attach(struct fas + map->phys = sg_dma_address(map->table->sgl); + map->phys += ((u64)fl->sctx->sid << 32); + } +- map->size = len; ++ for_each_sg(map->table->sgl, sgl, map->table->nents, ++ sgl_index) ++ map->size += sg_dma_len(sgl); ++ if (len > map->size) { ++ dev_dbg(sess->dev, "Bad size passed len 0x%llx map size 0x%llx\n", ++ len, map->size); ++ err = -EINVAL; ++ goto map_err; ++ } + map->va = sg_virt(map->table->sgl); + map->len = len; + +@@ -806,10 +815,10 @@ static int fastrpc_map_attach(struct fas + dst_perms[1].vmid = fl->cctx->vmperms[0].vmid; + dst_perms[1].perm = QCOM_SCM_PERM_RWX; + map->attr = attr; +- err = qcom_scm_assign_mem(map->phys, (u64)map->size, &src_perms, dst_perms, 2); ++ err = qcom_scm_assign_mem(map->phys, (u64)map->len, &src_perms, dst_perms, 2); + if (err) { + dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d\n", +- map->phys, map->size, err); ++ map->phys, map->len, err); + goto map_err; + } + } +@@ -2063,7 +2072,7 @@ static int fastrpc_req_mem_map(struct fa + args[0].length = sizeof(req_msg); + + pages.addr = map->phys; +- pages.size = map->size; ++ pages.size = map->len; + + args[1].ptr = (u64) (uintptr_t) &pages; + args[1].length = sizeof(pages); +@@ -2078,7 +2087,7 @@ static int fastrpc_req_mem_map(struct fa + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]); + if (err) { + dev_err(dev, "mem mmap error, fd %d, vaddr %llx, size %lld\n", +- req.fd, req.vaddrin, map->size); ++ req.fd, req.vaddrin, map->len); + goto err_invoke; + } + +@@ -2091,7 +2100,7 @@ static int fastrpc_req_mem_map(struct fa + if (copy_to_user((void __user *)argp, &req, sizeof(req))) { + /* unmap the memory and release the buffer */ + req_unmap.vaddr = (uintptr_t) rsp_msg.vaddr; +- req_unmap.length = map->size; ++ req_unmap.length = map->len; + fastrpc_req_mem_unmap_impl(fl, &req_unmap); + return -EFAULT; + } diff --git a/queue-6.6/mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch b/queue-6.6/mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch new file mode 100644 index 0000000000..1a4811a85e --- /dev/null +++ b/queue-6.6/mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch @@ -0,0 +1,101 @@ +From stable+bounces-185674-greg=kroah.com@vger.kernel.org Tue Oct 14 16:01:42 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 10:01:32 -0400 +Subject: mm/ksm: fix incorrect KSM counter handling in mm_struct during fork +To: stable@vger.kernel.org +Cc: Donet Tom , Chengming Zhou , David Hildenbrand , Aboorva Devarajan , "Ritesh Harjani (IBM)" , Wei Yang , xu xin , Andrew Morton , Sasha Levin +Message-ID: <20251014140132.49794-1-sashal@kernel.org> + +From: Donet Tom + +[ Upstream commit 4d6fc29f36341d7795db1d1819b4c15fe9be7b23 ] + +Patch series "mm/ksm: Fix incorrect accounting of KSM counters during +fork", v3. + +The first patch in this series fixes the incorrect accounting of KSM +counters such as ksm_merging_pages, ksm_rmap_items, and the global +ksm_zero_pages during fork. + +The following patch add a selftest to verify the ksm_merging_pages counter +was updated correctly during fork. + +Test Results +============ +Without the first patch +----------------------- + # [RUN] test_fork_ksm_merging_page_count + not ok 10 ksm_merging_page in child: 32 + +With the first patch +-------------------- + # [RUN] test_fork_ksm_merging_page_count + ok 10 ksm_merging_pages is not inherited after fork + +This patch (of 2): + +Currently, the KSM-related counters in `mm_struct`, such as +`ksm_merging_pages`, `ksm_rmap_items`, and `ksm_zero_pages`, are inherited +by the child process during fork. This results in inconsistent +accounting. + +When a process uses KSM, identical pages are merged and an rmap item is +created for each merged page. The `ksm_merging_pages` and +`ksm_rmap_items` counters are updated accordingly. However, after a fork, +these counters are copied to the child while the corresponding rmap items +are not. As a result, when the child later triggers an unmerge, there are +no rmap items present in the child, so the counters remain stale, leading +to incorrect accounting. + +A similar issue exists with `ksm_zero_pages`, which maintains both a +global counter and a per-process counter. During fork, the per-process +counter is inherited by the child, but the global counter is not +incremented. Since the child also references zero pages, the global +counter should be updated as well. Otherwise, during zero-page unmerge, +both the global and per-process counters are decremented, causing the +global counter to become inconsistent. + +To fix this, ksm_merging_pages and ksm_rmap_items are reset to 0 during +fork, and the global ksm_zero_pages counter is updated with the +per-process ksm_zero_pages value inherited by the child. This ensures +that KSM statistics remain accurate and reflect the activity of each +process correctly. + +Link: https://lkml.kernel.org/r/cover.1758648700.git.donettom@linux.ibm.com +Link: https://lkml.kernel.org/r/7b9870eb67ccc0d79593940d9dbd4a0b39b5d396.1758648700.git.donettom@linux.ibm.com +Fixes: 7609385337a4 ("ksm: count ksm merging pages for each process") +Fixes: cb4df4cae4f2 ("ksm: count allocated ksm rmap_items for each process") +Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM") +Signed-off-by: Donet Tom +Reviewed-by: Chengming Zhou +Acked-by: David Hildenbrand +Cc: Aboorva Devarajan +Cc: David Hildenbrand +Cc: Donet Tom +Cc: "Ritesh Harjani (IBM)" +Cc: Wei Yang +Cc: xu xin +Cc: [6.6+] +Signed-off-by: Andrew Morton +[ changed mm_flags_test() to test_bit() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ksm.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/include/linux/ksm.h ++++ b/include/linux/ksm.h +@@ -59,6 +59,12 @@ static inline int ksm_fork(struct mm_str + int ret; + + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) { ++ long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages); ++ ++ mm->ksm_merging_pages = 0; ++ mm->ksm_rmap_items = 0; ++ atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages); ++ + ret = __ksm_enter(mm); + if (ret) + return ret; diff --git a/queue-6.6/pci-endpoint-pci-epf-test-add-null-check-for-dma-channels-before-release.patch b/queue-6.6/pci-endpoint-pci-epf-test-add-null-check-for-dma-channels-before-release.patch new file mode 100644 index 0000000000..9c700b2dfd --- /dev/null +++ b/queue-6.6/pci-endpoint-pci-epf-test-add-null-check-for-dma-channels-before-release.patch @@ -0,0 +1,75 @@ +From stable+bounces-185699-greg=kroah.com@vger.kernel.org Tue Oct 14 18:07:36 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 12:06:18 -0400 +Subject: PCI: endpoint: pci-epf-test: Add NULL check for DMA channels before release +To: stable@vger.kernel.org +Cc: "Shin'ichiro Kawasaki" , "Manivannan Sadhasivam" , "Damien Le Moal" , "Krzysztof Wilczyński" , "Sasha Levin" +Message-ID: <20251014160618.158328-2-sashal@kernel.org> + +From: Shin'ichiro Kawasaki + +[ Upstream commit 85afa9ea122dd9d4a2ead104a951d318975dcd25 ] + +The fields dma_chan_tx and dma_chan_rx of the struct pci_epf_test can be +NULL even after EPF initialization. Then it is prudent to check that +they have non-NULL values before releasing the channels. Add the checks +in pci_epf_test_clean_dma_chan(). + +Without the checks, NULL pointer dereferences happen and they can lead +to a kernel panic in some cases: + + Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050 + Call trace: + dma_release_channel+0x2c/0x120 (P) + pci_epf_test_epc_deinit+0x94/0xc0 [pci_epf_test] + pci_epc_deinit_notify+0x74/0xc0 + tegra_pcie_ep_pex_rst_irq+0x250/0x5d8 + irq_thread_fn+0x34/0xb8 + irq_thread+0x18c/0x2e8 + kthread+0x14c/0x210 + ret_from_fork+0x10/0x20 + +Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") +Fixes: 5ebf3fc59bd2 ("PCI: endpoint: functions/pci-epf-test: Add DMA support to transfer data") +Signed-off-by: Shin'ichiro Kawasaki +[mani: trimmed the stack trace] +Signed-off-by: Manivannan Sadhasivam +Reviewed-by: Damien Le Moal +Reviewed-by: Krzysztof Wilczyński +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250916025756.34807-1-shinichiro.kawasaki@wdc.com +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/endpoint/functions/pci-epf-test.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/drivers/pci/endpoint/functions/pci-epf-test.c ++++ b/drivers/pci/endpoint/functions/pci-epf-test.c +@@ -282,15 +282,20 @@ static void pci_epf_test_clean_dma_chan( + if (!epf_test->dma_supported) + return; + +- dma_release_channel(epf_test->dma_chan_tx); +- if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) { ++ if (epf_test->dma_chan_tx) { ++ dma_release_channel(epf_test->dma_chan_tx); ++ if (epf_test->dma_chan_tx == epf_test->dma_chan_rx) { ++ epf_test->dma_chan_tx = NULL; ++ epf_test->dma_chan_rx = NULL; ++ return; ++ } + epf_test->dma_chan_tx = NULL; +- epf_test->dma_chan_rx = NULL; +- return; + } + +- dma_release_channel(epf_test->dma_chan_rx); +- epf_test->dma_chan_rx = NULL; ++ if (epf_test->dma_chan_rx) { ++ dma_release_channel(epf_test->dma_chan_rx); ++ epf_test->dma_chan_rx = NULL; ++ } + } + + static void pci_epf_test_print_rate(struct pci_epf_test *epf_test, diff --git a/queue-6.6/pci-endpoint-remove-surplus-return-statement-from-pci_epf_test_clean_dma_chan.patch b/queue-6.6/pci-endpoint-remove-surplus-return-statement-from-pci_epf_test_clean_dma_chan.patch new file mode 100644 index 0000000000..37a825e53e --- /dev/null +++ b/queue-6.6/pci-endpoint-remove-surplus-return-statement-from-pci_epf_test_clean_dma_chan.patch @@ -0,0 +1,48 @@ +From stable+bounces-185698-greg=kroah.com@vger.kernel.org Tue Oct 14 18:06:33 2025 +From: Sasha Levin +Date: Tue, 14 Oct 2025 12:06:17 -0400 +Subject: PCI: endpoint: Remove surplus return statement from pci_epf_test_clean_dma_chan() +To: stable@vger.kernel.org +Cc: "Wang Jiang" , "Krzysztof Wilczyński" , "Sasha Levin" +Message-ID: <20251014160618.158328-1-sashal@kernel.org> + +From: Wang Jiang + +[ Upstream commit 9b80bdb10aee04ce7289896e6bdad13e33972636 ] + +Remove a surplus return statement from the void function that has been +added in the commit commit 8353813c88ef ("PCI: endpoint: Enable DMA +tests for endpoints with DMA capabilities"). + +Especially, as an empty return statements at the end of a void functions +serve little purpose. + +This fixes the following checkpatch.pl script warning: + + WARNING: void function return statements are not generally useful + #296: FILE: drivers/pci/endpoint/functions/pci-epf-test.c:296: + + return; + +} + +Link: https://lore.kernel.org/r/tencent_F250BEE2A65745A524E2EFE70CF615CA8F06@qq.com +Signed-off-by: Wang Jiang +[kwilczynski: commit log] +Signed-off-by: Krzysztof Wilczyński +Stable-dep-of: 85afa9ea122d ("PCI: endpoint: pci-epf-test: Add NULL check for DMA channels before release") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/endpoint/functions/pci-epf-test.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/pci/endpoint/functions/pci-epf-test.c ++++ b/drivers/pci/endpoint/functions/pci-epf-test.c +@@ -291,8 +291,6 @@ static void pci_epf_test_clean_dma_chan( + + dma_release_channel(epf_test->dma_chan_rx); + epf_test->dma_chan_rx = NULL; +- +- return; + } + + static void pci_epf_test_print_rate(struct pci_epf_test *epf_test, diff --git a/queue-6.6/rseq-protect-event-mask-against-membarrier-ipi.patch b/queue-6.6/rseq-protect-event-mask-against-membarrier-ipi.patch new file mode 100644 index 0000000000..dbcc421b11 --- /dev/null +++ b/queue-6.6/rseq-protect-event-mask-against-membarrier-ipi.patch @@ -0,0 +1,80 @@ +From stable+bounces-185872-greg=kroah.com@vger.kernel.org Thu Oct 16 03:31:59 2025 +From: Sasha Levin +Date: Wed, 15 Oct 2025 21:31:44 -0400 +Subject: rseq: Protect event mask against membarrier IPI +To: stable@vger.kernel.org +Cc: Thomas Gleixner , Boqun Feng , Mathieu Desnoyers , Sasha Levin +Message-ID: <20251016013144.1560902-1-sashal@kernel.org> + +From: Thomas Gleixner + +[ Upstream commit 6eb350a2233100a283f882c023e5ad426d0ed63b ] + +rseq_need_restart() reads and clears task::rseq_event_mask with preemption +disabled to guard against the scheduler. + +But membarrier() uses an IPI and sets the PREEMPT bit in the event mask +from the IPI, which leaves that RMW operation unprotected. + +Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that. + +Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") +Signed-off-by: Thomas Gleixner +Reviewed-by: Boqun Feng +Reviewed-by: Mathieu Desnoyers +Cc: stable@vger.kernel.org +[ Applied changes to include/linux/sched.h instead of include/linux/rseq.h ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 11 ++++++++--- + kernel/rseq.c | 10 +++++----- + 2 files changed, 13 insertions(+), 8 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2343,6 +2343,12 @@ enum rseq_event_mask { + RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), + }; + ++#ifdef CONFIG_MEMBARRIER ++# define RSEQ_EVENT_GUARD irq ++#else ++# define RSEQ_EVENT_GUARD preempt ++#endif ++ + static inline void rseq_set_notify_resume(struct task_struct *t) + { + if (t->rseq) +@@ -2361,9 +2367,8 @@ static inline void rseq_handle_notify_re + static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) + { +- preempt_disable(); +- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); +- preempt_enable(); ++ scoped_guard(RSEQ_EVENT_GUARD) ++ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); + rseq_handle_notify_resume(ksig, regs); + } + +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -255,12 +255,12 @@ static int rseq_need_restart(struct task + + /* + * Load and clear event mask atomically with respect to +- * scheduler preemption. ++ * scheduler preemption and membarrier IPIs. + */ +- preempt_disable(); +- event_mask = t->rseq_event_mask; +- t->rseq_event_mask = 0; +- preempt_enable(); ++ scoped_guard(RSEQ_EVENT_GUARD) { ++ event_mask = t->rseq_event_mask; ++ t->rseq_event_mask = 0; ++ } + + return !!event_mask; + } diff --git a/queue-6.6/s390-bpf-centralize-frame-offset-calculations.patch b/queue-6.6/s390-bpf-centralize-frame-offset-calculations.patch new file mode 100644 index 0000000000..b94cb5d0ad --- /dev/null +++ b/queue-6.6/s390-bpf-centralize-frame-offset-calculations.patch @@ -0,0 +1,221 @@ +From stable+bounces-186208-greg=kroah.com@vger.kernel.org Thu Oct 16 23:55:26 2025 +From: Ilya Leoshkevich +Date: Thu, 16 Oct 2025 23:51:25 +0200 +Subject: s390/bpf: Centralize frame offset calculations +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Alexei Starovoitov +Message-ID: <20251016215450.53494-3-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit b2268d550d20ff860bddfe3a91b1aec00414689a upstream. + +The calculation of the distance from %r15 to the caller-allocated +portion of the stack frame is copy-pasted into multiple places in the +JIT code. + +Move it to bpf_jit_prog() and save the result into bpf_jit::frame_off, +so that the other parts of the JIT can use it. + +Signed-off-by: Ilya Leoshkevich +Link: https://lore.kernel.org/r/20250624121501.50536-2-iii@linux.ibm.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 56 +++++++++++++++++++------------------------ + 1 file changed, 26 insertions(+), 30 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -53,6 +53,7 @@ struct bpf_jit { + int excnt; /* Number of exception table entries */ + int prologue_plt_ret; /* Return address for prologue hotpatch PLT */ + int prologue_plt; /* Start of prologue hotpatch PLT */ ++ u32 frame_off; /* Offset of frame from %r15 */ + }; + + #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ +@@ -416,12 +417,9 @@ static void save_regs(struct bpf_jit *ji + /* + * Restore registers from "rs" (register start) to "re" (register end) on stack + */ +-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth) ++static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = STK_OFF_R6 + (rs - 6) * 8; +- +- if (jit->seen & SEEN_STACK) +- off += STK_OFF + stack_depth; ++ u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8; + + if (rs == re) + /* lg %rs,off(%r15) */ +@@ -465,8 +463,7 @@ static int get_end(u16 seen_regs, int st + * Save and restore clobbered registers (6-15) on stack. + * We save/restore registers in chunks with gap >= 2 registers. + */ +-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth, +- u16 extra_regs) ++static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs) + { + u16 seen_regs = jit->seen_regs | extra_regs; + const int last = 15, save_restore_size = 6; +@@ -489,7 +486,7 @@ static void save_restore_regs(struct bpf + if (op == REGS_SAVE) + save_regs(jit, rs, re); + else +- restore_regs(jit, rs, re, stack_depth); ++ restore_regs(jit, rs, re); + re++; + } while (re <= last); + } +@@ -556,8 +553,7 @@ static void bpf_jit_plt(struct bpf_plt * + * Save registers and create stack frame if necessary. + * See stack frame layout description in "bpf_jit.h"! + */ +-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, +- u32 stack_depth) ++static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp) + { + /* No-op for hotpatching */ + /* brcl 0,prologue_plt */ +@@ -579,7 +575,7 @@ static void bpf_jit_prologue(struct bpf_ + /* Tail calls have to skip above initialization */ + jit->tail_call_start = jit->prg; + /* Save registers */ +- save_restore_regs(jit, REGS_SAVE, stack_depth, 0); ++ save_restore_regs(jit, REGS_SAVE, 0); + /* Setup literal pool */ + if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) { + if (!is_first_pass(jit) && +@@ -599,8 +595,8 @@ static void bpf_jit_prologue(struct bpf_ + EMIT4(0xb9040000, REG_W1, REG_15); + /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ + EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); +- /* aghi %r15,-STK_OFF */ +- EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); ++ /* aghi %r15,-frame_off */ ++ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); +@@ -647,13 +643,13 @@ static void call_r1(struct bpf_jit *jit) + /* + * Function epilogue + */ +-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) ++static void bpf_jit_epilogue(struct bpf_jit *jit) + { + jit->exit_ip = jit->prg; + /* Load exit code: lgr %r2,%b0 */ + EMIT4(0xb9040000, REG_2, BPF_REG_0); + /* Restore registers */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); ++ save_restore_regs(jit, REGS_RESTORE, 0); + if (nospec_uses_trampoline()) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ +@@ -779,7 +775,7 @@ static int sign_extend(struct bpf_jit *j + * stack space for the large switch statement. + */ + static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, +- int i, bool extra_pass, u32 stack_depth) ++ int i, bool extra_pass) + { + struct bpf_insn *insn = &fp->insnsi[i]; + u32 dst_reg = insn->dst_reg; +@@ -1433,9 +1429,9 @@ static noinline int bpf_jit_insn(struct + * Note 2: We assume that the verifier does not let us call the + * main program, which clears the tail call counter on entry. + */ +- /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */ ++ /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */ + _EMIT6(0xd203f000 | STK_OFF_TCCNT, +- 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth)); ++ 0xf000 | (jit->frame_off + STK_OFF_TCCNT)); + + /* Sign-extend the kfunc arguments. */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { +@@ -1486,10 +1482,7 @@ static noinline int bpf_jit_insn(struct + * goto out; + */ + +- if (jit->seen & SEEN_STACK) +- off = STK_OFF_TCCNT + STK_OFF + stack_depth; +- else +- off = STK_OFF_TCCNT; ++ off = jit->frame_off + STK_OFF_TCCNT; + /* lhi %w0,1 */ + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ +@@ -1519,7 +1512,7 @@ static noinline int bpf_jit_insn(struct + /* + * Restore registers before calling function + */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); ++ save_restore_regs(jit, REGS_RESTORE, 0); + + /* + * goto *(prog->bpf_func + tail_call_start); +@@ -1822,7 +1815,7 @@ static int bpf_set_addr(struct bpf_jit * + * Compile eBPF program into s390x code + */ + static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, +- bool extra_pass, u32 stack_depth) ++ bool extra_pass) + { + int i, insn_count, lit32_size, lit64_size; + +@@ -1830,19 +1823,23 @@ static int bpf_jit_prog(struct bpf_jit * + jit->lit64 = jit->lit64_start; + jit->prg = 0; + jit->excnt = 0; ++ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) ++ jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8); ++ else ++ jit->frame_off = 0; + +- bpf_jit_prologue(jit, fp, stack_depth); ++ bpf_jit_prologue(jit, fp); + if (bpf_set_addr(jit, 0) < 0) + return -1; + for (i = 0; i < fp->len; i += insn_count) { +- insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); ++ insn_count = bpf_jit_insn(jit, fp, i, extra_pass); + if (insn_count < 0) + return -1; + /* Next instruction address */ + if (bpf_set_addr(jit, i + insn_count) < 0) + return -1; + } +- bpf_jit_epilogue(jit, stack_depth); ++ bpf_jit_epilogue(jit); + + lit32_size = jit->lit32 - jit->lit32_start; + lit64_size = jit->lit64 - jit->lit64_start; +@@ -1902,7 +1899,6 @@ static struct bpf_binary_header *bpf_jit + */ + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) + { +- u32 stack_depth = round_up(fp->aux->stack_depth, 8); + struct bpf_prog *tmp, *orig_fp = fp; + struct bpf_binary_header *header; + struct s390_jit_data *jit_data; +@@ -1955,7 +1951,7 @@ struct bpf_prog *bpf_int_jit_compile(str + * - 3: Calculate program size and addrs array + */ + for (pass = 1; pass <= 3; pass++) { +- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { ++ if (bpf_jit_prog(&jit, fp, extra_pass)) { + fp = orig_fp; + goto free_addrs; + } +@@ -1969,7 +1965,7 @@ struct bpf_prog *bpf_int_jit_compile(str + goto free_addrs; + } + skip_init_ctx: +- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { ++ if (bpf_jit_prog(&jit, fp, extra_pass)) { + bpf_jit_binary_free(header); + fp = orig_fp; + goto free_addrs; diff --git a/queue-6.6/s390-bpf-change-seen_reg-to-a-mask.patch b/queue-6.6/s390-bpf-change-seen_reg-to-a-mask.patch new file mode 100644 index 0000000000..fa61b6e5f0 --- /dev/null +++ b/queue-6.6/s390-bpf-change-seen_reg-to-a-mask.patch @@ -0,0 +1,141 @@ +From stable+bounces-186210-greg=kroah.com@vger.kernel.org Thu Oct 16 23:55:34 2025 +From: Ilya Leoshkevich +Date: Thu, 16 Oct 2025 23:51:24 +0200 +Subject: s390/bpf: Change seen_reg to a mask +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Daniel Borkmann +Message-ID: <20251016215450.53494-2-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit 7ba4f43e16de351fe9821de80e15d88c884b2967 upstream. + +Using a mask instead of an array saves a small amount of memory and +allows marking multiple registers as seen with a simple "or". Another +positive side-effect is that it speeds up verification with jitterbug. + +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20240703005047.40915-2-iii@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -35,7 +35,7 @@ + + struct bpf_jit { + u32 seen; /* Flags to remember seen eBPF instructions */ +- u32 seen_reg[16]; /* Array to remember which registers are used */ ++ u16 seen_regs; /* Mask to remember which registers are used */ + u32 *addrs; /* Array with relative instruction addresses */ + u8 *prg_buf; /* Start of program */ + int size; /* Size of program and literal pool */ +@@ -118,8 +118,8 @@ static inline void reg_set_seen(struct b + { + u32 r1 = reg2hex[b1]; + +- if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) +- jit->seen_reg[r1] = 1; ++ if (r1 >= 6 && r1 <= 15) ++ jit->seen_regs |= (1 << r1); + } + + #define REG_SET_SEEN(b1) \ +@@ -127,8 +127,6 @@ static inline void reg_set_seen(struct b + reg_set_seen(jit, b1); \ + }) + +-#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]] +- + /* + * EMIT macros for code generation + */ +@@ -436,12 +434,12 @@ static void restore_regs(struct bpf_jit + /* + * Return first seen register (from start) + */ +-static int get_start(struct bpf_jit *jit, int start) ++static int get_start(u16 seen_regs, int start) + { + int i; + + for (i = start; i <= 15; i++) { +- if (jit->seen_reg[i]) ++ if (seen_regs & (1 << i)) + return i; + } + return 0; +@@ -450,15 +448,15 @@ static int get_start(struct bpf_jit *jit + /* + * Return last seen register (from start) (gap >= 2) + */ +-static int get_end(struct bpf_jit *jit, int start) ++static int get_end(u16 seen_regs, int start) + { + int i; + + for (i = start; i < 15; i++) { +- if (!jit->seen_reg[i] && !jit->seen_reg[i + 1]) ++ if (!(seen_regs & (3 << i))) + return i - 1; + } +- return jit->seen_reg[15] ? 15 : 14; ++ return (seen_regs & (1 << 15)) ? 15 : 14; + } + + #define REGS_SAVE 1 +@@ -467,8 +465,10 @@ static int get_end(struct bpf_jit *jit, + * Save and restore clobbered registers (6-15) on stack. + * We save/restore registers in chunks with gap >= 2 registers. + */ +-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) ++static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth, ++ u16 extra_regs) + { ++ u16 seen_regs = jit->seen_regs | extra_regs; + const int last = 15, save_restore_size = 6; + int re = 6, rs; + +@@ -482,10 +482,10 @@ static void save_restore_regs(struct bpf + } + + do { +- rs = get_start(jit, re); ++ rs = get_start(seen_regs, re); + if (!rs) + break; +- re = get_end(jit, rs + 1); ++ re = get_end(seen_regs, rs + 1); + if (op == REGS_SAVE) + save_regs(jit, rs, re); + else +@@ -579,7 +579,7 @@ static void bpf_jit_prologue(struct bpf_ + /* Tail calls have to skip above initialization */ + jit->tail_call_start = jit->prg; + /* Save registers */ +- save_restore_regs(jit, REGS_SAVE, stack_depth); ++ save_restore_regs(jit, REGS_SAVE, stack_depth, 0); + /* Setup literal pool */ + if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) { + if (!is_first_pass(jit) && +@@ -653,7 +653,7 @@ static void bpf_jit_epilogue(struct bpf_ + /* Load exit code: lgr %r2,%b0 */ + EMIT4(0xb9040000, REG_2, BPF_REG_0); + /* Restore registers */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth); ++ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); + if (nospec_uses_trampoline()) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ +@@ -1519,7 +1519,7 @@ static noinline int bpf_jit_insn(struct + /* + * Restore registers before calling function + */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth); ++ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); + + /* + * goto *(prog->bpf_func + tail_call_start); diff --git a/queue-6.6/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch b/queue-6.6/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch new file mode 100644 index 0000000000..c4e560b2fc --- /dev/null +++ b/queue-6.6/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch @@ -0,0 +1,260 @@ +From stable+bounces-186209-greg=kroah.com@vger.kernel.org Thu Oct 16 23:55:25 2025 +From: Ilya Leoshkevich +Date: Thu, 16 Oct 2025 23:51:26 +0200 +Subject: s390/bpf: Describe the frame using a struct instead of constants +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Alexei Starovoitov +Message-ID: <20251016215450.53494-4-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit e26d523edf2a62b142d2dd2dd9b87f61ed92f33a upstream. + +Currently the caller-allocated portion of the stack frame is described +using constants, hardcoded values, and an ASCII drawing, making it +harder than necessary to ensure that everything is in sync. + +Declare a struct and use offsetof() and offsetofend() macros to refer +to various values stored within the frame. + +Signed-off-by: Ilya Leoshkevich +Link: https://lore.kernel.org/r/20250624121501.50536-3-iii@linux.ibm.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit.h | 55 ---------------------------------- + arch/s390/net/bpf_jit_comp.c | 69 +++++++++++++++++++++++++++++-------------- + 2 files changed, 47 insertions(+), 77 deletions(-) + delete mode 100644 arch/s390/net/bpf_jit.h + +--- a/arch/s390/net/bpf_jit.h ++++ /dev/null +@@ -1,55 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * BPF Jit compiler defines +- * +- * Copyright IBM Corp. 2012,2015 +- * +- * Author(s): Martin Schwidefsky +- * Michael Holzheu +- */ +- +-#ifndef __ARCH_S390_NET_BPF_JIT_H +-#define __ARCH_S390_NET_BPF_JIT_H +- +-#ifndef __ASSEMBLY__ +- +-#include +-#include +- +-#endif /* __ASSEMBLY__ */ +- +-/* +- * Stackframe layout (packed stack): +- * +- * ^ high +- * +---------------+ | +- * | old backchain | | +- * +---------------+ | +- * | r15 - r6 | | +- * +---------------+ | +- * | 4 byte align | | +- * | tail_call_cnt | | +- * BFP -> +===============+ | +- * | | | +- * | BPF stack | | +- * | | | +- * R15+160 -> +---------------+ | +- * | new backchain | | +- * R15+152 -> +---------------+ | +- * | + 152 byte SA | | +- * R15 -> +---------------+ + low +- * +- * We get 160 bytes stack space from calling function, but only use +- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt. +- * +- * The stack size used by the BPF program ("BPF stack" above) is passed +- * via "aux->stack_depth". +- */ +-#define STK_SPACE_ADD (160) +-#define STK_160_UNUSED (160 - 12 * 8) +-#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) +- +-#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ +-#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ +- +-#endif /* __ARCH_S390_NET_BPF_JIT_H */ +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -31,7 +31,6 @@ + #include + #include + #include +-#include "bpf_jit.h" + + struct bpf_jit { + u32 seen; /* Flags to remember seen eBPF instructions */ +@@ -53,7 +52,7 @@ struct bpf_jit { + int excnt; /* Number of exception table entries */ + int prologue_plt_ret; /* Return address for prologue hotpatch PLT */ + int prologue_plt; /* Start of prologue hotpatch PLT */ +- u32 frame_off; /* Offset of frame from %r15 */ ++ u32 frame_off; /* Offset of struct bpf_prog from %r15 */ + }; + + #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ +@@ -400,11 +399,25 @@ static void jit_fill_hole(void *area, un + } + + /* ++ * Caller-allocated part of the frame. ++ * Thanks to packed stack, its otherwise unused initial part can be used for ++ * the BPF stack and for the next frame. ++ */ ++struct prog_frame { ++ u64 unused[8]; ++ /* BPF stack starts here and grows towards 0 */ ++ u32 tail_call_cnt; ++ u32 pad; ++ u64 r6[10]; /* r6 - r15 */ ++ u64 backchain; ++} __packed; ++ ++/* + * Save registers from "rs" (register start) to "re" (register end) on stack + */ + static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = STK_OFF_R6 + (rs - 6) * 8; ++ u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8; + + if (rs == re) + /* stg %rs,off(%r15) */ +@@ -419,7 +432,7 @@ static void save_regs(struct bpf_jit *ji + */ + static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8; ++ u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8; + + if (rs == re) + /* lg %rs,off(%r15) */ +@@ -551,10 +564,12 @@ static void bpf_jit_plt(struct bpf_plt * + * Emit function prologue + * + * Save registers and create stack frame if necessary. +- * See stack frame layout description in "bpf_jit.h"! ++ * Stack frame layout is described by struct prog_frame. + */ + static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp) + { ++ BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD); ++ + /* No-op for hotpatching */ + /* brcl 0,prologue_plt */ + EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); +@@ -562,8 +577,9 @@ static void bpf_jit_prologue(struct bpf_ + + if (fp->aux->func_idx == 0) { + /* Initialize the tail call counter in the main program. */ +- /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ +- _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); ++ /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */ ++ _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt)); + } else { + /* + * Skip the tail call counter initialization in subprograms. +@@ -593,13 +609,15 @@ static void bpf_jit_prologue(struct bpf_ + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); +- /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ +- EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); ++ /* la %bfp,unused_end(%r15) (BPF frame pointer) */ ++ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, ++ offsetofend(struct prog_frame, unused)); + /* aghi %r15,-frame_off */ + EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off); +- /* stg %w1,152(%r15) (backchain) */ ++ /* stg %w1,backchain(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, +- REG_15, 152); ++ REG_15, ++ offsetof(struct prog_frame, backchain)); + } + } + +@@ -1429,9 +1447,10 @@ static noinline int bpf_jit_insn(struct + * Note 2: We assume that the verifier does not let us call the + * main program, which clears the tail call counter on entry. + */ +- /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */ +- _EMIT6(0xd203f000 | STK_OFF_TCCNT, +- 0xf000 | (jit->frame_off + STK_OFF_TCCNT)); ++ /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */ ++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | (jit->frame_off + ++ offsetof(struct prog_frame, tail_call_cnt))); + + /* Sign-extend the kfunc arguments. */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { +@@ -1482,7 +1501,8 @@ static noinline int bpf_jit_insn(struct + * goto out; + */ + +- off = jit->frame_off + STK_OFF_TCCNT; ++ off = jit->frame_off + ++ offsetof(struct prog_frame, tail_call_cnt); + /* lhi %w0,1 */ + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ +@@ -1824,7 +1844,9 @@ static int bpf_jit_prog(struct bpf_jit * + jit->prg = 0; + jit->excnt = 0; + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) +- jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8); ++ jit->frame_off = sizeof(struct prog_frame) - ++ offsetofend(struct prog_frame, unused) + ++ round_up(fp->aux->stack_depth, 8); + else + jit->frame_off = 0; + +@@ -2281,9 +2303,10 @@ static int __arch_prepare_bpf_trampoline + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); +- /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ ++ /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */ + _EMIT6(0xd203f000 | tjit->tccnt_off, +- 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); ++ 0xf000 | (tjit->stack_size + ++ offsetof(struct prog_frame, tail_call_cnt))); + /* stmg %r2,%rN,fwd_reg_args_off(%r15) */ + if (nr_reg_args) + EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2, +@@ -2420,8 +2443,9 @@ static int __arch_prepare_bpf_trampoline + (nr_stack_args * sizeof(u64) - 1) << 16 | + tjit->stack_args_off, + 0xf000 | tjit->orig_stack_args_off); +- /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ +- _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off); ++ /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */ ++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | tjit->tccnt_off); + /* lgr %r1,%r8 */ + EMIT4(0xb9040000, REG_1, REG_8); + /* %r1() */ +@@ -2478,8 +2502,9 @@ static int __arch_prepare_bpf_trampoline + if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET)) + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15, + tjit->retval_off); +- /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ +- _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT), ++ /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */ ++ _EMIT6(0xd203f000 | (tjit->stack_size + ++ offsetof(struct prog_frame, tail_call_cnt)), + 0xf000 | tjit->tccnt_off); + /* aghi %r15,stack_size */ + EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); diff --git a/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch b/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch new file mode 100644 index 0000000000..a78fab6ee9 --- /dev/null +++ b/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch @@ -0,0 +1,75 @@ +From stable+bounces-186211-greg=kroah.com@vger.kernel.org Thu Oct 16 23:55:37 2025 +From: Ilya Leoshkevich +Date: Thu, 16 Oct 2025 23:51:27 +0200 +Subject: s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Daniel Borkmann +Message-ID: <20251016215450.53494-5-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit c861a6b147137d10b5ff88a2c492ba376cd1b8b0 upstream. + +The tailcall_bpf2bpf_hierarchy_1 test hangs on s390. Its call graph is +as follows: + + entry() + subprog_tail() + bpf_tail_call_static(0) -> entry + tail_call_start + subprog_tail() + bpf_tail_call_static(0) -> entry + tail_call_start + +entry() copies its tail call counter to the subprog_tail()'s frame, +which then increments it. However, the incremented result is discarded, +leading to an astronomically large number of tail calls. + +Fix by writing the incremented counter back to the entry()'s frame. + +Fixes: dd691e847d28 ("s390/bpf: Implement bpf_jit_supports_subprog_tailcalls()") +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20250813121016.163375-3-iii@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -1439,13 +1439,6 @@ static noinline int bpf_jit_insn(struct + jit->seen |= SEEN_FUNC; + /* + * Copy the tail call counter to where the callee expects it. +- * +- * Note 1: The callee can increment the tail call counter, but +- * we do not load it back, since the x86 JIT does not do this +- * either. +- * +- * Note 2: We assume that the verifier does not let us call the +- * main program, which clears the tail call counter on entry. + */ + /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */ + _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), +@@ -1472,6 +1465,22 @@ static noinline int bpf_jit_insn(struct + call_r1(jit); + /* lgr %b0,%r2: load return value into %b0 */ + EMIT4(0xb9040000, BPF_REG_0, REG_2); ++ ++ /* ++ * Copy the potentially updated tail call counter back. ++ */ ++ ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ /* ++ * mvc frame_off+tail_call_cnt(%r15), ++ * tail_call_cnt(4,%r15) ++ */ ++ _EMIT6(0xd203f000 | (jit->frame_off + ++ offsetof(struct prog_frame, ++ tail_call_cnt)), ++ 0xf000 | offsetof(struct prog_frame, ++ tail_call_cnt)); ++ + break; + } + case BPF_JMP | BPF_TAIL_CALL: { diff --git a/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch b/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch new file mode 100644 index 0000000000..52e2c82f5e --- /dev/null +++ b/queue-6.6/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch @@ -0,0 +1,50 @@ +From stable+bounces-186212-greg=kroah.com@vger.kernel.org Thu Oct 16 23:55:42 2025 +From: Ilya Leoshkevich +Date: Thu, 16 Oct 2025 23:51:28 +0200 +Subject: s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Daniel Borkmann +Message-ID: <20251016215450.53494-6-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit bc3905a71f02511607d3ccf732360580209cac4c upstream. + +The tailcall_bpf2bpf_hierarchy_fentry test hangs on s390. Its call +graph is as follows: + + entry() + subprog_tail() + trampoline() + fentry() + the rest of subprog_tail() # via BPF_TRAMP_F_CALL_ORIG + return to entry() + +The problem is that the rest of subprog_tail() increments the tail call +counter, but the trampoline discards the incremented value. This +results in an astronomically large number of tail calls. + +Fix by making the trampoline write the incremented tail call counter +back. + +Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()") +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20250813121016.163375-4-iii@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -2462,6 +2462,9 @@ static int __arch_prepare_bpf_trampoline + /* stg %r2,retval_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15, + tjit->retval_off); ++ /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */ ++ _EMIT6(0xd203f000 | tjit->tccnt_off, ++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt)); + + im->ip_after_call = jit->prg_buf + jit->prg; + diff --git a/queue-6.6/series b/queue-6.6/series index e5aea927f3..a06ba7a66f 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -154,3 +154,36 @@ ext4-validate-ea_ino-and-size-in-check_xattrs.patch acpica-allow-to-skip-global-lock-initialization.patch ext4-free-orphan-info-with-kvfree.patch lib-crypto-curve25519-hacl64-disable-kasan-with-clang-17-and-older.patch +squashfs-add-additional-inode-sanity-checking.patch +squashfs-reject-negative-file-sizes-in-squashfs_read_inode.patch +media-mc-clear-minor-number-before-put-device.patch +mfd-intel_soc_pmic_chtdc_ti-fix-invalid-regmap-config-max_register-value.patch +mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch +mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch +tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch +mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch +kvm-svm-skip-fastpath-emulation-on-vm-exit-if-next-rip-isn-t-valid.patch +ksmbd-add-max-ip-connections-parameter.patch +misc-fastrpc-add-missing-dev_err-newlines.patch +misc-fastrpc-save-actual-dma-size-in-fastrpc_map-structure.patch +pci-endpoint-remove-surplus-return-statement-from-pci_epf_test_clean_dma_chan.patch +pci-endpoint-pci-epf-test-add-null-check-for-dma-channels-before-release.patch +btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch +rseq-protect-event-mask-against-membarrier-ipi.patch +kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch +ipmi-rework-user-message-limit-handling.patch +ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch +arm64-kprobes-call-set_memory_rox-for-kprobe-page.patch +arm64-mte-do-not-flag-the-zero-page-as-pg_mte_tagged.patch +acpi-battery-allocate-driver-data-through-devm_-apis.patch +acpi-battery-initialize-mutexes-through-devm_-apis.patch +acpi-battery-check-for-error-code-from-devm_mutex_init-call.patch +acpi-battery-add-synchronization-between-interface-updates.patch +acpi-property-disregard-references-in-data-only-subnode-lists.patch +acpi-property-add-code-comments-explaining-what-is-going-on.patch +acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch +s390-bpf-change-seen_reg-to-a-mask.patch +s390-bpf-centralize-frame-offset-calculations.patch +s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch +s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch +s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch diff --git a/queue-6.6/squashfs-add-additional-inode-sanity-checking.patch b/queue-6.6/squashfs-add-additional-inode-sanity-checking.patch new file mode 100644 index 0000000000..ac59eaffc4 --- /dev/null +++ b/queue-6.6/squashfs-add-additional-inode-sanity-checking.patch @@ -0,0 +1,90 @@ +From stable+bounces-185509-greg=kroah.com@vger.kernel.org Mon Oct 13 22:24:52 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 16:24:43 -0400 +Subject: Squashfs: add additional inode sanity checking +To: stable@vger.kernel.org +Cc: Phillip Lougher , Andrew Morton , Sasha Levin +Message-ID: <20251013202444.3589382-1-sashal@kernel.org> + +From: Phillip Lougher + +[ Upstream commit 9ee94bfbe930a1b39df53fa2d7b31141b780eb5a ] + +Patch series "Squashfs: performance improvement and a sanity check". + +This patchset adds an additional sanity check when reading regular file +inodes, and adds support for SEEK_DATA/SEEK_HOLE lseek() whence values. + +This patch (of 2): + +Add an additional sanity check when reading regular file inodes. + +A regular file if the file size is an exact multiple of the filesystem +block size cannot have a fragment. This is because by definition a +fragment block stores tailends which are not a whole block in size. + +Link: https://lkml.kernel.org/r/20250923220652.568416-1-phillip@squashfs.org.uk +Link: https://lkml.kernel.org/r/20250923220652.568416-2-phillip@squashfs.org.uk +Signed-off-by: Phillip Lougher +Signed-off-by: Andrew Morton +Stable-dep-of: 9f1c14c1de1b ("Squashfs: reject negative file sizes in squashfs_read_inode()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/squashfs/inode.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/fs/squashfs/inode.c ++++ b/fs/squashfs/inode.c +@@ -140,8 +140,17 @@ int squashfs_read_inode(struct inode *in + if (err < 0) + goto failed_read; + ++ inode->i_size = le32_to_cpu(sqsh_ino->file_size); + frag = le32_to_cpu(sqsh_ino->fragment); + if (frag != SQUASHFS_INVALID_FRAG) { ++ /* ++ * the file cannot have a fragment (tailend) and have a ++ * file size a multiple of the block size ++ */ ++ if ((inode->i_size & (msblk->block_size - 1)) == 0) { ++ err = -EINVAL; ++ goto failed_read; ++ } + frag_offset = le32_to_cpu(sqsh_ino->offset); + frag_size = squashfs_frag_lookup(sb, frag, &frag_blk); + if (frag_size < 0) { +@@ -155,7 +164,6 @@ int squashfs_read_inode(struct inode *in + } + + set_nlink(inode, 1); +- inode->i_size = le32_to_cpu(sqsh_ino->file_size); + inode->i_fop = &generic_ro_fops; + inode->i_mode |= S_IFREG; + inode->i_blocks = ((inode->i_size - 1) >> 9) + 1; +@@ -184,8 +192,17 @@ int squashfs_read_inode(struct inode *in + if (err < 0) + goto failed_read; + ++ inode->i_size = le64_to_cpu(sqsh_ino->file_size); + frag = le32_to_cpu(sqsh_ino->fragment); + if (frag != SQUASHFS_INVALID_FRAG) { ++ /* ++ * the file cannot have a fragment (tailend) and have a ++ * file size a multiple of the block size ++ */ ++ if ((inode->i_size & (msblk->block_size - 1)) == 0) { ++ err = -EINVAL; ++ goto failed_read; ++ } + frag_offset = le32_to_cpu(sqsh_ino->offset); + frag_size = squashfs_frag_lookup(sb, frag, &frag_blk); + if (frag_size < 0) { +@@ -200,7 +217,6 @@ int squashfs_read_inode(struct inode *in + + xattr_id = le32_to_cpu(sqsh_ino->xattr); + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); +- inode->i_size = le64_to_cpu(sqsh_ino->file_size); + inode->i_op = &squashfs_inode_ops; + inode->i_fop = &generic_ro_fops; + inode->i_mode |= S_IFREG; diff --git a/queue-6.6/squashfs-reject-negative-file-sizes-in-squashfs_read_inode.patch b/queue-6.6/squashfs-reject-negative-file-sizes-in-squashfs_read_inode.patch new file mode 100644 index 0000000000..83c29d7950 --- /dev/null +++ b/queue-6.6/squashfs-reject-negative-file-sizes-in-squashfs_read_inode.patch @@ -0,0 +1,48 @@ +From stable+bounces-185510-greg=kroah.com@vger.kernel.org Mon Oct 13 22:24:54 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 16:24:44 -0400 +Subject: Squashfs: reject negative file sizes in squashfs_read_inode() +To: stable@vger.kernel.org +Cc: Phillip Lougher , syzbot+f754e01116421e9754b9@syzkaller.appspotmail.com, Amir Goldstein , Andrew Morton , Sasha Levin +Message-ID: <20251013202444.3589382-2-sashal@kernel.org> + +From: Phillip Lougher + +[ Upstream commit 9f1c14c1de1bdde395f6cc893efa4f80a2ae3b2b ] + +Syskaller reports a "WARNING in ovl_copy_up_file" in overlayfs. + +This warning is ultimately caused because the underlying Squashfs file +system returns a file with a negative file size. + +This commit checks for a negative file size and returns EINVAL. + +[phillip@squashfs.org.uk: only need to check 64 bit quantity] + Link: https://lkml.kernel.org/r/20250926222305.110103-1-phillip@squashfs.org.uk +Link: https://lkml.kernel.org/r/20250926215935.107233-1-phillip@squashfs.org.uk +Fixes: 6545b246a2c8 ("Squashfs: inode operations") +Signed-off-by: Phillip Lougher +Reported-by: syzbot+f754e01116421e9754b9@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/68d580e5.a00a0220.303701.0019.GAE@google.com/ +Cc: Amir Goldstein +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/squashfs/inode.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/squashfs/inode.c ++++ b/fs/squashfs/inode.c +@@ -193,6 +193,10 @@ int squashfs_read_inode(struct inode *in + goto failed_read; + + inode->i_size = le64_to_cpu(sqsh_ino->file_size); ++ if (inode->i_size < 0) { ++ err = -EINVAL; ++ goto failed_read; ++ } + frag = le32_to_cpu(sqsh_ino->fragment); + if (frag != SQUASHFS_INVALID_FRAG) { + /* diff --git a/queue-6.6/tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch b/queue-6.6/tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch new file mode 100644 index 0000000000..20817f4131 --- /dev/null +++ b/queue-6.6/tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch @@ -0,0 +1,274 @@ +From stable+bounces-185563-greg=kroah.com@vger.kernel.org Tue Oct 14 04:15:32 2025 +From: Sasha Levin +Date: Mon, 13 Oct 2025 22:15:24 -0400 +Subject: tracing: Fix race condition in kprobe initialization causing NULL pointer dereference +To: stable@vger.kernel.org +Cc: Yuan Chen , "Masami Hiramatsu (Google)" , Sasha Levin +Message-ID: <20251014021524.3834046-1-sashal@kernel.org> + +From: Yuan Chen + +[ Upstream commit 9cf9aa7b0acfde7545c1a1d912576e9bab28dc6f ] + +There is a critical race condition in kprobe initialization that can lead to +NULL pointer dereference and kernel crash. + +[1135630.084782] Unable to handle kernel paging request at virtual address 0000710a04630000 +... +[1135630.260314] pstate: 404003c9 (nZcv DAIF +PAN -UAO) +[1135630.269239] pc : kprobe_perf_func+0x30/0x260 +[1135630.277643] lr : kprobe_dispatcher+0x44/0x60 +[1135630.286041] sp : ffffaeff4977fa40 +[1135630.293441] x29: ffffaeff4977fa40 x28: ffffaf015340e400 +[1135630.302837] x27: 0000000000000000 x26: 0000000000000000 +[1135630.312257] x25: ffffaf029ed108a8 x24: ffffaf015340e528 +[1135630.321705] x23: ffffaeff4977fc50 x22: ffffaeff4977fc50 +[1135630.331154] x21: 0000000000000000 x20: ffffaeff4977fc50 +[1135630.340586] x19: ffffaf015340e400 x18: 0000000000000000 +[1135630.349985] x17: 0000000000000000 x16: 0000000000000000 +[1135630.359285] x15: 0000000000000000 x14: 0000000000000000 +[1135630.368445] x13: 0000000000000000 x12: 0000000000000000 +[1135630.377473] x11: 0000000000000000 x10: 0000000000000000 +[1135630.386411] x9 : 0000000000000000 x8 : 0000000000000000 +[1135630.395252] x7 : 0000000000000000 x6 : 0000000000000000 +[1135630.403963] x5 : 0000000000000000 x4 : 0000000000000000 +[1135630.412545] x3 : 0000710a04630000 x2 : 0000000000000006 +[1135630.421021] x1 : ffffaeff4977fc50 x0 : 0000710a04630000 +[1135630.429410] Call trace: +[1135630.434828] kprobe_perf_func+0x30/0x260 +[1135630.441661] kprobe_dispatcher+0x44/0x60 +[1135630.448396] aggr_pre_handler+0x70/0xc8 +[1135630.454959] kprobe_breakpoint_handler+0x140/0x1e0 +[1135630.462435] brk_handler+0xbc/0xd8 +[1135630.468437] do_debug_exception+0x84/0x138 +[1135630.475074] el1_dbg+0x18/0x8c +[1135630.480582] security_file_permission+0x0/0xd0 +[1135630.487426] vfs_write+0x70/0x1c0 +[1135630.493059] ksys_write+0x5c/0xc8 +[1135630.498638] __arm64_sys_write+0x24/0x30 +[1135630.504821] el0_svc_common+0x78/0x130 +[1135630.510838] el0_svc_handler+0x38/0x78 +[1135630.516834] el0_svc+0x8/0x1b0 + +kernel/trace/trace_kprobe.c: 1308 +0xffff3df8995039ec : ldr x21, [x24,#120] +include/linux/compiler.h: 294 +0xffff3df8995039f0 : ldr x1, [x21,x0] + +kernel/trace/trace_kprobe.c +1308: head = this_cpu_ptr(call->perf_events); +1309: if (hlist_empty(head)) +1310: return 0; + +crash> struct trace_event_call -o +struct trace_event_call { + ... + [120] struct hlist_head *perf_events; //(call->perf_event) + ... +} + +crash> struct trace_event_call ffffaf015340e528 +struct trace_event_call { + ... + perf_events = 0xffff0ad5fa89f088, //this value is correct, but x21 = 0 + ... +} + +Race Condition Analysis: + +The race occurs between kprobe activation and perf_events initialization: + + CPU0 CPU1 + ==== ==== + perf_kprobe_init + perf_trace_event_init + tp_event->perf_events = list;(1) + tp_event->class->reg (2)← KPROBE ACTIVE + Debug exception triggers + ... + kprobe_dispatcher + kprobe_perf_func (tk->tp.flags & TP_FLAG_PROFILE) + head = this_cpu_ptr(call->perf_events)(3) + (perf_events is still NULL) + +Problem: +1. CPU0 executes (1) assigning tp_event->perf_events = list +2. CPU0 executes (2) enabling kprobe functionality via class->reg() +3. CPU1 triggers and reaches kprobe_dispatcher +4. CPU1 checks TP_FLAG_PROFILE - condition passes (step 2 completed) +5. CPU1 calls kprobe_perf_func() and crashes at (3) because + call->perf_events is still NULL + +CPU1 sees that kprobe functionality is enabled but does not see that +perf_events has been assigned. + +Add pairing read and write memory barriers to guarantee that if CPU1 +sees that kprobe functionality is enabled, it must also see that +perf_events has been assigned. + +Link: https://lore.kernel.org/all/20251001022025.44626-1-chenyuan_fl@163.com/ + +Fixes: 50d780560785 ("tracing/kprobes: Add probe handler dispatcher to support perf and ftrace concurrent use") +Cc: stable@vger.kernel.org +Signed-off-by: Yuan Chen +Signed-off-by: Masami Hiramatsu (Google) +[ Adjust context ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_fprobe.c | 11 +++++++---- + kernel/trace/trace_kprobe.c | 11 +++++++---- + kernel/trace/trace_probe.h | 9 +++++++-- + kernel/trace/trace_uprobe.c | 12 ++++++++---- + 4 files changed, 29 insertions(+), 14 deletions(-) + +--- a/kernel/trace/trace_fprobe.c ++++ b/kernel/trace/trace_fprobe.c +@@ -342,12 +342,14 @@ static int fentry_dispatcher(struct fpro + void *entry_data) + { + struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); ++ unsigned int flags = trace_probe_load_flag(&tf->tp); + int ret = 0; + +- if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) ++ if (flags & TP_FLAG_TRACE) + fentry_trace_func(tf, entry_ip, regs); ++ + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + ret = fentry_perf_func(tf, entry_ip, regs); + #endif + return ret; +@@ -359,11 +361,12 @@ static void fexit_dispatcher(struct fpro + void *entry_data) + { + struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); ++ unsigned int flags = trace_probe_load_flag(&tf->tp); + +- if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) ++ if (flags & TP_FLAG_TRACE) + fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data); + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data); + #endif + } +--- a/kernel/trace/trace_kprobe.c ++++ b/kernel/trace/trace_kprobe.c +@@ -1713,14 +1713,15 @@ static int kprobe_register(struct trace_ + static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) + { + struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); ++ unsigned int flags = trace_probe_load_flag(&tk->tp); + int ret = 0; + + raw_cpu_inc(*tk->nhit); + +- if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) ++ if (flags & TP_FLAG_TRACE) + kprobe_trace_func(tk, regs); + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + ret = kprobe_perf_func(tk, regs); + #endif + return ret; +@@ -1732,6 +1733,7 @@ kretprobe_dispatcher(struct kretprobe_in + { + struct kretprobe *rp = get_kretprobe(ri); + struct trace_kprobe *tk; ++ unsigned int flags; + + /* + * There is a small chance that get_kretprobe(ri) returns NULL when +@@ -1744,10 +1746,11 @@ kretprobe_dispatcher(struct kretprobe_in + tk = container_of(rp, struct trace_kprobe, rp); + raw_cpu_inc(*tk->nhit); + +- if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) ++ flags = trace_probe_load_flag(&tk->tp); ++ if (flags & TP_FLAG_TRACE) + kretprobe_trace_func(tk, ri, regs); + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + kretprobe_perf_func(tk, ri, regs); + #endif + return 0; /* We don't tweak kernel, so just return 0 */ +--- a/kernel/trace/trace_probe.h ++++ b/kernel/trace/trace_probe.h +@@ -268,16 +268,21 @@ struct event_file_link { + struct list_head list; + }; + ++static inline unsigned int trace_probe_load_flag(struct trace_probe *tp) ++{ ++ return smp_load_acquire(&tp->event->flags); ++} ++ + static inline bool trace_probe_test_flag(struct trace_probe *tp, + unsigned int flag) + { +- return !!(tp->event->flags & flag); ++ return !!(trace_probe_load_flag(tp) & flag); + } + + static inline void trace_probe_set_flag(struct trace_probe *tp, + unsigned int flag) + { +- tp->event->flags |= flag; ++ smp_store_release(&tp->event->flags, tp->event->flags | flag); + } + + static inline void trace_probe_clear_flag(struct trace_probe *tp, +--- a/kernel/trace/trace_uprobe.c ++++ b/kernel/trace/trace_uprobe.c +@@ -1514,6 +1514,7 @@ static int uprobe_dispatcher(struct upro + struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb = NULL; ++ unsigned int flags; + int ret = 0; + + tu = container_of(con, struct trace_uprobe, consumer); +@@ -1527,11 +1528,12 @@ static int uprobe_dispatcher(struct upro + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + +- if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ++ flags = trace_probe_load_flag(&tu->tp); ++ if (flags & TP_FLAG_TRACE) + ret |= uprobe_trace_func(tu, regs, &ucb); + + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + ret |= uprobe_perf_func(tu, regs, &ucb); + #endif + uprobe_buffer_put(ucb); +@@ -1544,6 +1546,7 @@ static int uretprobe_dispatcher(struct u + struct trace_uprobe *tu; + struct uprobe_dispatch_data udd; + struct uprobe_cpu_buffer *ucb = NULL; ++ unsigned int flags; + + tu = container_of(con, struct trace_uprobe, consumer); + +@@ -1555,11 +1558,12 @@ static int uretprobe_dispatcher(struct u + if (WARN_ON_ONCE(!uprobe_cpu_buffer)) + return 0; + +- if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ++ flags = trace_probe_load_flag(&tu->tp); ++ if (flags & TP_FLAG_TRACE) + uretprobe_trace_func(tu, func, regs, &ucb); + + #ifdef CONFIG_PERF_EVENTS +- if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) ++ if (flags & TP_FLAG_PROFILE) + uretprobe_perf_func(tu, func, regs, &ucb); + #endif + uprobe_buffer_put(ucb);