From: Greg Kroah-Hartman Date: Mon, 12 Aug 2024 11:38:07 +0000 (+0200) Subject: 6.10-stable patches X-Git-Tag: v6.1.105~73 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7bfc4532ea014588f561d0c666b786d2debc4be8;p=thirdparty%2Fkernel%2Fstable-queue.git 6.10-stable patches added patches: arm64-dts-ti-k3-am62-verdin-dahlia-keep-ctrl_sleep_moci-regulator-on.patch driver-core-fix-uevent_show-vs-driver-detach-race.patch ntp-safeguard-against-time_constant-overflow.patch timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch tracefs-fix-inode-allocation.patch tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch --- diff --git a/queue-6.10/arm64-dts-ti-k3-am62-verdin-dahlia-keep-ctrl_sleep_moci-regulator-on.patch b/queue-6.10/arm64-dts-ti-k3-am62-verdin-dahlia-keep-ctrl_sleep_moci-regulator-on.patch new file mode 100644 index 00000000000..ab48071b134 --- /dev/null +++ b/queue-6.10/arm64-dts-ti-k3-am62-verdin-dahlia-keep-ctrl_sleep_moci-regulator-on.patch @@ -0,0 +1,94 @@ +From 9438f970296f9c3a6dd340ae0ad01d2f056c88e6 Mon Sep 17 00:00:00 2001 +From: Francesco Dolcini +Date: Wed, 31 Jul 2024 07:48:04 +0200 +Subject: arm64: dts: ti: k3-am62-verdin-dahlia: Keep CTRL_SLEEP_MOCI# regulator on + +From: Francesco Dolcini + +commit 9438f970296f9c3a6dd340ae0ad01d2f056c88e6 upstream. + +This reverts commit 3935fbc87ddebea5439f3ab6a78b1e83e976bf88. + +CTRL_SLEEP_MOCI# is a signal that is defined for all the SoM +implementing the Verdin family specification, this signal is supposed to +control the power enable in the carrier board when the system is in deep +sleep mode. However this is not possible with Texas Instruments AM62 +SoC, IOs output buffer is disabled in deep sleep and IOs are in +tri-state mode. + +Given that we cannot properly control this pin, force it to be always +high to minimize potential issues. + +Fixes: 3935fbc87dde ("arm64: dts: ti: k3-am62-verdin-dahlia: support sleep-moci") +Cc: +Link: https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1361669/am625-gpio-output-state-in-deep-sleep/5244802 +Signed-off-by: Francesco Dolcini +Link: https://lore.kernel.org/r/20240731054804.6061-1-francesco@dolcini.it +Signed-off-by: Nishanth Menon +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi | 22 ---------------------- + arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 6 ------ + 2 files changed, 28 deletions(-) + +--- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +@@ -43,15 +43,6 @@ + sound-dai = <&mcasp0>; + }; + }; +- +- reg_usb_hub: regulator-usb-hub { +- compatible = "regulator-fixed"; +- enable-active-high; +- /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ +- gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; +- regulator-boot-on; +- regulator-name = "HUB_PWR_EN"; +- }; + }; + + /* Verdin ETHs */ +@@ -193,11 +184,6 @@ + status = "okay"; + }; + +-/* Do not force CTRL_SLEEP_MOCI# always enabled */ +-®_force_sleep_moci { +- status = "disabled"; +-}; +- + /* Verdin SD_1 */ + &sdhci1 { + status = "okay"; +@@ -218,15 +204,7 @@ + }; + + &usb1 { +- #address-cells = <1>; +- #size-cells = <0>; + status = "okay"; +- +- usb-hub@1 { +- compatible = "usb424,2744"; +- reg = <1>; +- vdd-supply = <®_usb_hub>; +- }; + }; + + /* Verdin CTRL_WAKE1_MICO# */ +--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +@@ -138,12 +138,6 @@ + vin-supply = <®_1v8>; + }; + +- /* +- * By default we enable CTRL_SLEEP_MOCI#, this is required to have +- * peripherals on the carrier board powered. +- * If more granularity or power saving is required this can be disabled +- * in the carrier board device tree files. +- */ + reg_force_sleep_moci: regulator-force-sleep-moci { + compatible = "regulator-fixed"; + enable-active-high; diff --git a/queue-6.10/driver-core-fix-uevent_show-vs-driver-detach-race.patch b/queue-6.10/driver-core-fix-uevent_show-vs-driver-detach-race.patch new file mode 100644 index 00000000000..c5820fe2bad --- /dev/null +++ b/queue-6.10/driver-core-fix-uevent_show-vs-driver-detach-race.patch @@ -0,0 +1,157 @@ +From 15fffc6a5624b13b428bb1c6e9088e32a55eb82c Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 12 Jul 2024 12:42:09 -0700 +Subject: driver core: Fix uevent_show() vs driver detach race + +From: Dan Williams + +commit 15fffc6a5624b13b428bb1c6e9088e32a55eb82c upstream. + +uevent_show() wants to de-reference dev->driver->name. There is no clean +way for a device attribute to de-reference dev->driver unless that +attribute is defined via (struct device_driver).dev_groups. Instead, the +anti-pattern of taking the device_lock() in the attribute handler risks +deadlocks with code paths that remove device attributes while holding +the lock. + +This deadlock is typically invisible to lockdep given the device_lock() +is marked lockdep_set_novalidate_class(), but some subsystems allocate a +local lockdep key for @dev->mutex to reveal reports of the form: + + ====================================================== + WARNING: possible circular locking dependency detected + 6.10.0-rc7+ #275 Tainted: G OE N + ------------------------------------------------------ + modprobe/2374 is trying to acquire lock: + ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220 + + but task is already holding lock: + ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (&cxl_root_key){+.+.}-{3:3}: + __mutex_lock+0x99/0xc30 + uevent_show+0xac/0x130 + dev_attr_show+0x18/0x40 + sysfs_kf_seq_show+0xac/0xf0 + seq_read_iter+0x110/0x450 + vfs_read+0x25b/0x340 + ksys_read+0x67/0xf0 + do_syscall_64+0x75/0x190 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + + -> #0 (kn->active#6){++++}-{0:0}: + __lock_acquire+0x121a/0x1fa0 + lock_acquire+0xd6/0x2e0 + kernfs_drain+0x1e9/0x200 + __kernfs_remove+0xde/0x220 + kernfs_remove_by_name_ns+0x5e/0xa0 + device_del+0x168/0x410 + device_unregister+0x13/0x60 + devres_release_all+0xb8/0x110 + device_unbind_cleanup+0xe/0x70 + device_release_driver_internal+0x1c7/0x210 + driver_detach+0x47/0x90 + bus_remove_driver+0x6c/0xf0 + cxl_acpi_exit+0xc/0x11 [cxl_acpi] + __do_sys_delete_module.isra.0+0x181/0x260 + do_syscall_64+0x75/0x190 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +The observation though is that driver objects are typically much longer +lived than device objects. It is reasonable to perform lockless +de-reference of a @driver pointer even if it is racing detach from a +device. Given the infrequency of driver unregistration, use +synchronize_rcu() in module_remove_driver() to close any potential +races. It is potentially overkill to suffer synchronize_rcu() just to +handle the rare module removal racing uevent_show() event. + +Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1]. + +Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()") +Reported-by: syzbot+4762dd74e32532cda5ff@syzkaller.appspotmail.com +Reported-by: Tetsuo Handa +Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.ne.jp [1] +Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.notmuch +Cc: stable@vger.kernel.org +Cc: Ashish Sangwan +Cc: Namjae Jeon +Cc: Dirk Behme +Cc: Greg Kroah-Hartman +Cc: Rafael J. Wysocki +Signed-off-by: Dan Williams +Link: https://lore.kernel.org/r/172081332794.577428.9738802016494057132.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 13 ++++++++----- + drivers/base/module.c | 4 ++++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) + { + const struct device *dev = kobj_to_dev(kobj); ++ struct device_driver *driver; + int retval = 0; + + /* add device node properties if present */ +@@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobje + if (dev->type && dev->type->name) + add_uevent_var(env, "DEVTYPE=%s", dev->type->name); + +- if (dev->driver) +- add_uevent_var(env, "DRIVER=%s", dev->driver->name); ++ /* Synchronize with module_remove_driver() */ ++ rcu_read_lock(); ++ driver = READ_ONCE(dev->driver); ++ if (driver) ++ add_uevent_var(env, "DRIVER=%s", driver->name); ++ rcu_read_unlock(); + + /* Add common DT information about the device */ + of_device_uevent(dev, env); +@@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device + if (!env) + return -ENOMEM; + +- /* Synchronize with really_probe() */ +- device_lock(dev); + /* let the kset specific function add its keys */ + retval = kset->uevent_ops->uevent(&dev->kobj, env); +- device_unlock(dev); + if (retval) + goto out; + +--- a/drivers/base/module.c ++++ b/drivers/base/module.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include "base.h" + + static char *make_driver_name(struct device_driver *drv) +@@ -97,6 +98,9 @@ void module_remove_driver(struct device_ + if (!drv) + return; + ++ /* Synchronize with dev_uevent() */ ++ synchronize_rcu(); ++ + sysfs_remove_link(&drv->p->kobj, "module"); + + if (drv->owner) diff --git a/queue-6.10/ntp-safeguard-against-time_constant-overflow.patch b/queue-6.10/ntp-safeguard-against-time_constant-overflow.patch new file mode 100644 index 00000000000..548c04661fe --- /dev/null +++ b/queue-6.10/ntp-safeguard-against-time_constant-overflow.patch @@ -0,0 +1,64 @@ +From 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 Mon Sep 17 00:00:00 2001 +From: Justin Stitt +Date: Fri, 17 May 2024 00:47:10 +0000 +Subject: ntp: Safeguard against time_constant overflow + +From: Justin Stitt + +commit 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 upstream. + +Using syzkaller with the recently reintroduced signed integer overflow +sanitizer produces this UBSAN report: + +UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:738:18 +9223372036854775806 + 4 cannot be represented in type 'long' +Call Trace: + handle_overflow+0x171/0x1b0 + __do_adjtimex+0x1236/0x1440 + do_adjtimex+0x2be/0x740 + +The user supplied time_constant value is incremented by four and then +clamped to the operating range. + +Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user +supplied value was sanity checked to be in the operating range. That change +removed the sanity check and relied on clamping after incrementing which +does not work correctly when the user supplied value is in the overflow +zone of the '+ 4' operation. + +The operation requires CAP_SYS_TIME and the side effect of the overflow is +NTP getting out of sync. + +Similar to the fixups for time_maxerror and time_esterror, clamp the user +space supplied value to the operating range. + +[ tglx: Switch to clamping ] + +Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") +Signed-off-by: Justin Stitt +Signed-off-by: Thomas Gleixner +Cc: Miroslav Lichvar +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-c-v2-1-f3a80096f36f@google.com +Closes: https://github.com/KSPP/linux/issues/352 +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/ntp.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/time/ntp.c ++++ b/kernel/time/ntp.c +@@ -733,11 +733,10 @@ static inline void process_adjtimex_mode + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); + + if (txc->modes & ADJ_TIMECONST) { +- time_constant = txc->constant; ++ time_constant = clamp(txc->constant, 0, MAXTC); + if (!(time_status & STA_NANO)) + time_constant += 4; +- time_constant = min(time_constant, (long)MAXTC); +- time_constant = max(time_constant, 0l); ++ time_constant = clamp(time_constant, 0, MAXTC); + } + + if (txc->modes & ADJ_TAI && diff --git a/queue-6.10/series b/queue-6.10/series index beff9b623fe..9bf6775a371 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -200,3 +200,9 @@ media-v4l-fix-missing-tabular-column-hint-for-y14p-format.patch vhost-vdpa-switch-to-use-vmf_insert_pfn-in-the-fault-handler.patch spmi-pmic-arb-add-missing-newline-in-dev_err-format-.patch ntp-clamp-maxerror-and-esterror-to-operating-range.patch +driver-core-fix-uevent_show-vs-driver-detach-race.patch +arm64-dts-ti-k3-am62-verdin-dahlia-keep-ctrl_sleep_moci-regulator-on.patch +tracefs-fix-inode-allocation.patch +tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch +ntp-safeguard-against-time_constant-overflow.patch +timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch diff --git a/queue-6.10/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch b/queue-6.10/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch new file mode 100644 index 00000000000..2d1f1e03e60 --- /dev/null +++ b/queue-6.10/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch @@ -0,0 +1,40 @@ +From 5916be8a53de6401871bdd953f6c60237b47d6d3 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 3 Aug 2024 17:07:51 +0200 +Subject: timekeeping: Fix bogus clock_was_set() invocation in do_adjtimex() + +From: Thomas Gleixner + +commit 5916be8a53de6401871bdd953f6c60237b47d6d3 upstream. + +The addition of the bases argument to clock_was_set() fixed up all call +sites correctly except for do_adjtimex(). This uses CLOCK_REALTIME +instead of CLOCK_SET_WALL as argument. CLOCK_REALTIME is 0. + +As a result the effect of that clock_was_set() notification is incomplete +and might result in timers expiring late because the hrtimer code does +not re-evaluate the affected clock bases. + +Use CLOCK_SET_WALL instead of CLOCK_REALTIME to tell the hrtimers code +which clock bases need to be re-evaluated. + +Fixes: 17a1b8826b45 ("hrtimer: Add bases argument to clock_was_set()") +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/877ccx7igo.ffs@tglx +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/timekeeping.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/time/timekeeping.c ++++ b/kernel/time/timekeeping.c +@@ -2479,7 +2479,7 @@ int do_adjtimex(struct __kernel_timex *t + clock_set |= timekeeping_advance(TK_ADV_FREQ); + + if (clock_set) +- clock_was_set(CLOCK_REALTIME); ++ clock_was_set(CLOCK_SET_WALL); + + ntp_notify_cmos_timer(); + diff --git a/queue-6.10/tracefs-fix-inode-allocation.patch b/queue-6.10/tracefs-fix-inode-allocation.patch new file mode 100644 index 00000000000..a53cf4395f9 --- /dev/null +++ b/queue-6.10/tracefs-fix-inode-allocation.patch @@ -0,0 +1,44 @@ +From 0df2ac59bebfac221463ef57ed3554899b41d75f Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 7 Aug 2024 13:51:38 +0200 +Subject: tracefs: Fix inode allocation + +From: Mathias Krause + +commit 0df2ac59bebfac221463ef57ed3554899b41d75f upstream. + +The leading comment above alloc_inode_sb() is pretty explicit about it: + + /* + * This must be used for allocating filesystems specific inodes to set + * up the inode reclaim context correctly. + */ + +Switch tracefs over to alloc_inode_sb() to make sure inodes are properly +linked. + +Cc: Ajay Kaher +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Al Viro +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/20240807115143.45927-2-minipli@grsecurity.net +Fixes: ba37ff75e04b ("eventfs: Implement tracefs_inode_cache") +Signed-off-by: Mathias Krause +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode + struct tracefs_inode *ti; + unsigned long flags; + +- ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); ++ ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); + if (!ti) + return NULL; + diff --git a/queue-6.10/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch b/queue-6.10/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch new file mode 100644 index 00000000000..667a49db84a --- /dev/null +++ b/queue-6.10/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch @@ -0,0 +1,259 @@ +From 0b6743bd60a56a701070b89fb80c327a44b7b3e2 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Wed, 7 Aug 2024 18:54:02 -0400 +Subject: tracefs: Use generic inode RCU for synchronizing freeing + +From: Steven Rostedt + +commit 0b6743bd60a56a701070b89fb80c327a44b7b3e2 upstream. + +With structure layout randomization enabled for 'struct inode' we need to +avoid overlapping any of the RCU-used / initialized-only-once members, +e.g. i_lru or i_sb_list to not corrupt related list traversals when making +use of the rcu_head. + +For an unlucky structure layout of 'struct inode' we may end up with the +following splat when running the ftrace selftests: + +[<...>] list_del corruption, ffff888103ee2cb0->next (tracefs_inode_cache+0x0/0x4e0 [slab object]) is NULL (prev is tracefs_inode_cache+0x78/0x4e0 [slab object]) +[<...>] ------------[ cut here ]------------ +[<...>] kernel BUG at lib/list_debug.c:54! +[<...>] invalid opcode: 0000 [#1] PREEMPT SMP KASAN +[<...>] CPU: 3 PID: 2550 Comm: mount Tainted: G N 6.8.12-grsec+ #122 ed2f536ca62f28b087b90e3cc906a8d25b3ddc65 +[<...>] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 +[<...>] RIP: 0010:[] __list_del_entry_valid_or_report+0x138/0x3e0 +[<...>] Code: 48 b8 99 fb 65 f2 ff ff ff ff e9 03 5c d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff e9 33 5a d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff <0f> 0b 4c 89 e9 48 89 ea 48 89 ee 48 c7 c7 60 8f dd 89 31 c0 e8 2f +[<...>] RSP: 0018:fffffe80416afaf0 EFLAGS: 00010283 +[<...>] RAX: 0000000000000098 RBX: ffff888103ee2cb0 RCX: 0000000000000000 +[<...>] RDX: ffffffff84655fe8 RSI: ffffffff89dd8b60 RDI: 0000000000000001 +[<...>] RBP: ffff888103ee2cb0 R08: 0000000000000001 R09: fffffbd0082d5f25 +[<...>] R10: fffffe80416af92f R11: 0000000000000001 R12: fdf99c16731d9b6d +[<...>] R13: 0000000000000000 R14: ffff88819ad4b8b8 R15: 0000000000000000 +[<...>] RBX: tracefs_inode_cache+0x0/0x4e0 [slab object] +[<...>] RDX: __list_del_entry_valid_or_report+0x108/0x3e0 +[<...>] RSI: __func__.47+0x4340/0x4400 +[<...>] RBP: tracefs_inode_cache+0x0/0x4e0 [slab object] +[<...>] RSP: process kstack fffffe80416afaf0+0x7af0/0x8000 [mount 2550 2550] +[<...>] R09: kasan shadow of process kstack fffffe80416af928+0x7928/0x8000 [mount 2550 2550] +[<...>] R10: process kstack fffffe80416af92f+0x792f/0x8000 [mount 2550 2550] +[<...>] R14: tracefs_inode_cache+0x78/0x4e0 [slab object] +[<...>] FS: 00006dcb380c1840(0000) GS:ffff8881e0600000(0000) knlGS:0000000000000000 +[<...>] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[<...>] CR2: 000076ab72b30e84 CR3: 000000000b088004 CR4: 0000000000360ef0 shadow CR4: 0000000000360ef0 +[<...>] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[<...>] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[<...>] ASID: 0003 +[<...>] Stack: +[<...>] ffffffff818a2315 00000000f5c856ee ffffffff896f1840 ffff888103ee2cb0 +[<...>] ffff88812b6b9750 0000000079d714b6 fffffbfff1e9280b ffffffff8f49405f +[<...>] 0000000000000001 0000000000000000 ffff888104457280 ffffffff8248b392 +[<...>] Call Trace: +[<...>] +[<...>] [] ? lock_release+0x175/0x380 fffffe80416afaf0 +[<...>] [] list_lru_del+0x152/0x740 fffffe80416afb48 +[<...>] [] list_lru_del_obj+0x113/0x280 fffffe80416afb88 +[<...>] [] ? _atomic_dec_and_lock+0x119/0x200 fffffe80416afb90 +[<...>] [] iput_final+0x1c4/0x9a0 fffffe80416afbb8 +[<...>] [] dentry_unlink_inode+0x44b/0xaa0 fffffe80416afbf8 +[<...>] [] __dentry_kill+0x23c/0xf00 fffffe80416afc40 +[<...>] [] ? __this_cpu_preempt_check+0x1f/0xa0 fffffe80416afc48 +[<...>] [] ? shrink_dentry_list+0x1c5/0x760 fffffe80416afc70 +[<...>] [] ? shrink_dentry_list+0x51/0x760 fffffe80416afc78 +[<...>] [] shrink_dentry_list+0x288/0x760 fffffe80416afc80 +[<...>] [] shrink_dcache_sb+0x155/0x420 fffffe80416afcc8 +[<...>] [] ? debug_smp_processor_id+0x23/0xa0 fffffe80416afce0 +[<...>] [] ? do_one_tree+0x140/0x140 fffffe80416afcf8 +[<...>] [] ? do_remount+0x329/0xa00 fffffe80416afd18 +[<...>] [] ? security_sb_remount+0x81/0x1c0 fffffe80416afd38 +[<...>] [] reconfigure_super+0x856/0x14e0 fffffe80416afd70 +[<...>] [] ? ns_capable_common+0xe7/0x2a0 fffffe80416afd90 +[<...>] [] do_remount+0x416/0xa00 fffffe80416afdd0 +[<...>] [] path_mount+0x5c4/0x900 fffffe80416afe28 +[<...>] [] ? finish_automount+0x13a0/0x13a0 fffffe80416afe60 +[<...>] [] ? user_path_at_empty+0xb2/0x140 fffffe80416afe88 +[<...>] [] do_mount+0x115/0x1c0 fffffe80416afeb8 +[<...>] [] ? path_mount+0x900/0x900 fffffe80416afed8 +[<...>] [] ? __kasan_check_write+0x1c/0xa0 fffffe80416afee0 +[<...>] [] __do_sys_mount+0x12f/0x280 fffffe80416aff30 +[<...>] [] __x64_sys_mount+0xcd/0x2e0 fffffe80416aff70 +[<...>] [] ? syscall_trace_enter+0x218/0x380 fffffe80416aff88 +[<...>] [] x64_sys_call+0x5d5e/0x6720 fffffe80416affa8 +[<...>] [] do_syscall_64+0xcd/0x3c0 fffffe80416affb8 +[<...>] [] entry_SYSCALL_64_safe_stack+0x4c/0x87 fffffe80416affe8 +[<...>] +[<...>] +[<...>] RIP: 0033:[<00006dcb382ff66a>] vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] +[<...>] Code: 48 8b 0d 29 18 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f6 17 0d 00 f7 d8 64 89 01 48 +[<...>] RSP: 002b:0000763d68192558 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 +[<...>] RAX: ffffffffffffffda RBX: 00006dcb38433264 RCX: 00006dcb382ff66a +[<...>] RDX: 000017c3e0d11210 RSI: 000017c3e0d1a5a0 RDI: 000017c3e0d1ae70 +[<...>] RBP: 000017c3e0d10fb0 R08: 000017c3e0d11260 R09: 00006dcb383d1be0 +[<...>] R10: 000000000020002e R11: 0000000000000246 R12: 0000000000000000 +[<...>] R13: 000017c3e0d1ae70 R14: 000017c3e0d11210 R15: 000017c3e0d10fb0 +[<...>] RBX: vm_area_struct[mount 2550 2550 file 6dcb38433000-6dcb38434000 5b 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RCX: vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] +[<...>] RDX: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RSI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RDI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RBP: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RSP: vm_area_struct[mount 2550 2550 anon 763d68173000-763d68195000 7ffffffdd 100133(read|write|mayread|maywrite|growsdown|account)]+0x0/0xb8 [userland map] +[<...>] R08: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R09: vm_area_struct[mount 2550 2550 file 6dcb383d1000-6dcb383d3000 1cd 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R13: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R14: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R15: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] +[<...>] Modules linked in: +[<...>] ---[ end trace 0000000000000000 ]--- + +The list debug message as well as RBX's symbolic value point out that the +object in question was allocated from 'tracefs_inode_cache' and that the +list's '->next' member is at offset 0. Dumping the layout of the relevant +parts of 'struct tracefs_inode' gives the following: + + struct tracefs_inode { + union { + struct inode { + struct list_head { + struct list_head * next; /* 0 8 */ + struct list_head * prev; /* 8 8 */ + } i_lru; + [...] + } vfs_inode; + struct callback_head { + void (*func)(struct callback_head *); /* 0 8 */ + struct callback_head * next; /* 8 8 */ + } rcu; + }; + [...] + }; + +Above shows that 'vfs_inode.i_lru' overlaps with 'rcu' which will +destroy the 'i_lru' list as soon as the 'rcu' member gets used, e.g. in +call_rcu() or later when calling the RCU callback. This will disturb +concurrent list traversals as well as object reuse which assumes these +list heads will keep their integrity. + +For reproduction, the following diff manually overlays 'i_lru' with +'rcu' as, otherwise, one would require some good portion of luck for +gambling an unlucky RANDSTRUCT seed: + + --- a/include/linux/fs.h + +++ b/include/linux/fs.h + @@ -629,6 +629,7 @@ struct inode { + umode_t i_mode; + unsigned short i_opflags; + kuid_t i_uid; + + struct list_head i_lru; /* inode LRU list */ + kgid_t i_gid; + unsigned int i_flags; + + @@ -690,7 +691,6 @@ struct inode { + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; + #endif + - struct list_head i_lru; /* inode LRU list */ + struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ + union { + +The tracefs inode does not need to supply its own RCU delayed destruction +of its inode. The inode code itself offers both a "destroy_inode()" +callback that gets called when the last reference of the inode is +released, and the "free_inode()" which is called after a RCU +synchronization period from the "destroy_inode()". + +The tracefs code can unlink the inode from its list in the destroy_inode() +callback, and the simply free it from the free_inode() callback. This +should provide the same protection. + +Link: https://lore.kernel.org/all/20240807115143.45927-3-minipli@grsecurity.net/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Ajay Kaher +Cc: Ilkka =?utf-8?b?TmF1bGFww6TDpA==?= +Link: https://lore.kernel.org/20240807185402.61410544@gandalf.local.home +Fixes: baa23a8d4360 ("tracefs: Reset permissions on remount if permissions are options") +Reported-by: Mathias Krause +Reported-by: Brad Spengler +Suggested-by: Al Viro +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 10 ++++------ + fs/tracefs/internal.h | 5 +---- + include/linux/fs.h | 2 +- + 3 files changed, 6 insertions(+), 11 deletions(-) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode + return &ti->vfs_inode; + } + +-static void tracefs_free_inode_rcu(struct rcu_head *rcu) ++static void tracefs_free_inode(struct inode *inode) + { +- struct tracefs_inode *ti; ++ struct tracefs_inode *ti = get_tracefs(inode); + +- ti = container_of(rcu, struct tracefs_inode, rcu); + kmem_cache_free(tracefs_inode_cachep, ti); + } + +-static void tracefs_free_inode(struct inode *inode) ++static void tracefs_destroy_inode(struct inode *inode) + { + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; +@@ -69,8 +68,6 @@ static void tracefs_free_inode(struct in + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); +- +- call_rcu(&ti->rcu, tracefs_free_inode_rcu); + } + + static ssize_t default_read_file(struct file *file, char __user *buf, +@@ -445,6 +442,7 @@ static int tracefs_drop_inode(struct ino + static const struct super_operations tracefs_super_operations = { + .alloc_inode = tracefs_alloc_inode, + .free_inode = tracefs_free_inode, ++ .destroy_inode = tracefs_destroy_inode, + .drop_inode = tracefs_drop_inode, + .statfs = simple_statfs, + .show_options = tracefs_show_options, +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -10,10 +10,7 @@ enum { + }; + + struct tracefs_inode { +- union { +- struct inode vfs_inode; +- struct rcu_head rcu; +- }; ++ struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; + unsigned long flags; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -629,6 +629,7 @@ struct inode { + umode_t i_mode; + unsigned short i_opflags; + kuid_t i_uid; ++ struct list_head i_lru; /* inode LRU list */ + kgid_t i_gid; + unsigned int i_flags; + +@@ -690,7 +691,6 @@ struct inode { + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; + #endif +- struct list_head i_lru; /* inode LRU list */ + struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ + union {