From: Greg Kroah-Hartman Date: Mon, 12 Aug 2024 11:37:42 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v6.1.105~74 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=96fb61de6cd53af1023d9a1aa6dccd3dc978c163;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: driver-core-fix-uevent_show-vs-driver-detach-race.patch ntp-safeguard-against-time_constant-overflow.patch timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch tracefs-fix-inode-allocation.patch tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch --- diff --git a/queue-6.6/driver-core-fix-uevent_show-vs-driver-detach-race.patch b/queue-6.6/driver-core-fix-uevent_show-vs-driver-detach-race.patch new file mode 100644 index 00000000000..674a0c64f2a --- /dev/null +++ b/queue-6.6/driver-core-fix-uevent_show-vs-driver-detach-race.patch @@ -0,0 +1,157 @@ +From 15fffc6a5624b13b428bb1c6e9088e32a55eb82c Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 12 Jul 2024 12:42:09 -0700 +Subject: driver core: Fix uevent_show() vs driver detach race + +From: Dan Williams + +commit 15fffc6a5624b13b428bb1c6e9088e32a55eb82c upstream. + +uevent_show() wants to de-reference dev->driver->name. There is no clean +way for a device attribute to de-reference dev->driver unless that +attribute is defined via (struct device_driver).dev_groups. Instead, the +anti-pattern of taking the device_lock() in the attribute handler risks +deadlocks with code paths that remove device attributes while holding +the lock. + +This deadlock is typically invisible to lockdep given the device_lock() +is marked lockdep_set_novalidate_class(), but some subsystems allocate a +local lockdep key for @dev->mutex to reveal reports of the form: + + ====================================================== + WARNING: possible circular locking dependency detected + 6.10.0-rc7+ #275 Tainted: G OE N + ------------------------------------------------------ + modprobe/2374 is trying to acquire lock: + ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220 + + but task is already holding lock: + ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (&cxl_root_key){+.+.}-{3:3}: + __mutex_lock+0x99/0xc30 + uevent_show+0xac/0x130 + dev_attr_show+0x18/0x40 + sysfs_kf_seq_show+0xac/0xf0 + seq_read_iter+0x110/0x450 + vfs_read+0x25b/0x340 + ksys_read+0x67/0xf0 + do_syscall_64+0x75/0x190 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + + -> #0 (kn->active#6){++++}-{0:0}: + __lock_acquire+0x121a/0x1fa0 + lock_acquire+0xd6/0x2e0 + kernfs_drain+0x1e9/0x200 + __kernfs_remove+0xde/0x220 + kernfs_remove_by_name_ns+0x5e/0xa0 + device_del+0x168/0x410 + device_unregister+0x13/0x60 + devres_release_all+0xb8/0x110 + device_unbind_cleanup+0xe/0x70 + device_release_driver_internal+0x1c7/0x210 + driver_detach+0x47/0x90 + bus_remove_driver+0x6c/0xf0 + cxl_acpi_exit+0xc/0x11 [cxl_acpi] + __do_sys_delete_module.isra.0+0x181/0x260 + do_syscall_64+0x75/0x190 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +The observation though is that driver objects are typically much longer +lived than device objects. It is reasonable to perform lockless +de-reference of a @driver pointer even if it is racing detach from a +device. Given the infrequency of driver unregistration, use +synchronize_rcu() in module_remove_driver() to close any potential +races. It is potentially overkill to suffer synchronize_rcu() just to +handle the rare module removal racing uevent_show() event. + +Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1]. + +Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()") +Reported-by: syzbot+4762dd74e32532cda5ff@syzkaller.appspotmail.com +Reported-by: Tetsuo Handa +Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.ne.jp [1] +Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.notmuch +Cc: stable@vger.kernel.org +Cc: Ashish Sangwan +Cc: Namjae Jeon +Cc: Dirk Behme +Cc: Greg Kroah-Hartman +Cc: Rafael J. Wysocki +Signed-off-by: Dan Williams +Link: https://lore.kernel.org/r/172081332794.577428.9738802016494057132.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/core.c | 13 ++++++++----- + drivers/base/module.c | 4 ++++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2565,6 +2566,7 @@ static const char *dev_uevent_name(const + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) + { + const struct device *dev = kobj_to_dev(kobj); ++ struct device_driver *driver; + int retval = 0; + + /* add device node properties if present */ +@@ -2593,8 +2595,12 @@ static int dev_uevent(const struct kobje + if (dev->type && dev->type->name) + add_uevent_var(env, "DEVTYPE=%s", dev->type->name); + +- if (dev->driver) +- add_uevent_var(env, "DRIVER=%s", dev->driver->name); ++ /* Synchronize with module_remove_driver() */ ++ rcu_read_lock(); ++ driver = READ_ONCE(dev->driver); ++ if (driver) ++ add_uevent_var(env, "DRIVER=%s", driver->name); ++ rcu_read_unlock(); + + /* Add common DT information about the device */ + of_device_uevent(dev, env); +@@ -2664,11 +2670,8 @@ static ssize_t uevent_show(struct device + if (!env) + return -ENOMEM; + +- /* Synchronize with really_probe() */ +- device_lock(dev); + /* let the kset specific function add its keys */ + retval = kset->uevent_ops->uevent(&dev->kobj, env); +- device_unlock(dev); + if (retval) + goto out; + +--- a/drivers/base/module.c ++++ b/drivers/base/module.c +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include "base.h" + + static char *make_driver_name(struct device_driver *drv) +@@ -97,6 +98,9 @@ void module_remove_driver(struct device_ + if (!drv) + return; + ++ /* Synchronize with dev_uevent() */ ++ synchronize_rcu(); ++ + sysfs_remove_link(&drv->p->kobj, "module"); + + if (drv->owner) diff --git a/queue-6.6/ntp-safeguard-against-time_constant-overflow.patch b/queue-6.6/ntp-safeguard-against-time_constant-overflow.patch new file mode 100644 index 00000000000..548c04661fe --- /dev/null +++ b/queue-6.6/ntp-safeguard-against-time_constant-overflow.patch @@ -0,0 +1,64 @@ +From 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 Mon Sep 17 00:00:00 2001 +From: Justin Stitt +Date: Fri, 17 May 2024 00:47:10 +0000 +Subject: ntp: Safeguard against time_constant overflow + +From: Justin Stitt + +commit 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 upstream. + +Using syzkaller with the recently reintroduced signed integer overflow +sanitizer produces this UBSAN report: + +UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:738:18 +9223372036854775806 + 4 cannot be represented in type 'long' +Call Trace: + handle_overflow+0x171/0x1b0 + __do_adjtimex+0x1236/0x1440 + do_adjtimex+0x2be/0x740 + +The user supplied time_constant value is incremented by four and then +clamped to the operating range. + +Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user +supplied value was sanity checked to be in the operating range. That change +removed the sanity check and relied on clamping after incrementing which +does not work correctly when the user supplied value is in the overflow +zone of the '+ 4' operation. + +The operation requires CAP_SYS_TIME and the side effect of the overflow is +NTP getting out of sync. + +Similar to the fixups for time_maxerror and time_esterror, clamp the user +space supplied value to the operating range. + +[ tglx: Switch to clamping ] + +Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") +Signed-off-by: Justin Stitt +Signed-off-by: Thomas Gleixner +Cc: Miroslav Lichvar +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-c-v2-1-f3a80096f36f@google.com +Closes: https://github.com/KSPP/linux/issues/352 +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/ntp.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/time/ntp.c ++++ b/kernel/time/ntp.c +@@ -733,11 +733,10 @@ static inline void process_adjtimex_mode + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); + + if (txc->modes & ADJ_TIMECONST) { +- time_constant = txc->constant; ++ time_constant = clamp(txc->constant, 0, MAXTC); + if (!(time_status & STA_NANO)) + time_constant += 4; +- time_constant = min(time_constant, (long)MAXTC); +- time_constant = max(time_constant, 0l); ++ time_constant = clamp(time_constant, 0, MAXTC); + } + + if (txc->modes & ADJ_TAI && diff --git a/queue-6.6/series b/queue-6.6/series index aa62dbfc270..7e42bd41aca 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -128,3 +128,8 @@ vhost-vdpa-switch-to-use-vmf_insert_pfn-in-the-fault-handler.patch ntp-clamp-maxerror-and-esterror-to-operating-range.patch clocksource-scale-the-watchdog-read-retries-automati.patch clocksource-fix-brown-bag-boolean-thinko-in-cs_watch.patch +driver-core-fix-uevent_show-vs-driver-detach-race.patch +tracefs-fix-inode-allocation.patch +tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch +ntp-safeguard-against-time_constant-overflow.patch +timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch diff --git a/queue-6.6/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch b/queue-6.6/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch new file mode 100644 index 00000000000..dee468430d7 --- /dev/null +++ b/queue-6.6/timekeeping-fix-bogus-clock_was_set-invocation-in-do_adjtimex.patch @@ -0,0 +1,40 @@ +From 5916be8a53de6401871bdd953f6c60237b47d6d3 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 3 Aug 2024 17:07:51 +0200 +Subject: timekeeping: Fix bogus clock_was_set() invocation in do_adjtimex() + +From: Thomas Gleixner + +commit 5916be8a53de6401871bdd953f6c60237b47d6d3 upstream. + +The addition of the bases argument to clock_was_set() fixed up all call +sites correctly except for do_adjtimex(). This uses CLOCK_REALTIME +instead of CLOCK_SET_WALL as argument. CLOCK_REALTIME is 0. + +As a result the effect of that clock_was_set() notification is incomplete +and might result in timers expiring late because the hrtimer code does +not re-evaluate the affected clock bases. + +Use CLOCK_SET_WALL instead of CLOCK_REALTIME to tell the hrtimers code +which clock bases need to be re-evaluated. + +Fixes: 17a1b8826b45 ("hrtimer: Add bases argument to clock_was_set()") +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/877ccx7igo.ffs@tglx +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/timekeeping.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/time/timekeeping.c ++++ b/kernel/time/timekeeping.c +@@ -2476,7 +2476,7 @@ int do_adjtimex(struct __kernel_timex *t + clock_set |= timekeeping_advance(TK_ADV_FREQ); + + if (clock_set) +- clock_was_set(CLOCK_REALTIME); ++ clock_was_set(CLOCK_SET_WALL); + + ntp_notify_cmos_timer(); + diff --git a/queue-6.6/tracefs-fix-inode-allocation.patch b/queue-6.6/tracefs-fix-inode-allocation.patch new file mode 100644 index 00000000000..50f42af5e2f --- /dev/null +++ b/queue-6.6/tracefs-fix-inode-allocation.patch @@ -0,0 +1,49 @@ +From 0df2ac59bebfac221463ef57ed3554899b41d75f Mon Sep 17 00:00:00 2001 +From: Mathias Krause +Date: Wed, 7 Aug 2024 13:51:38 +0200 +Subject: tracefs: Fix inode allocation + +From: Mathias Krause + +commit 0df2ac59bebfac221463ef57ed3554899b41d75f upstream. + +The leading comment above alloc_inode_sb() is pretty explicit about it: + + /* + * This must be used for allocating filesystems specific inodes to set + * up the inode reclaim context correctly. + */ + +Switch tracefs over to alloc_inode_sb() to make sure inodes are properly +linked. + +Cc: Ajay Kaher +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Al Viro +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/20240807115143.45927-2-minipli@grsecurity.net +Fixes: ba37ff75e04b ("eventfs: Implement tracefs_inode_cache") +Signed-off-by: Mathias Krause +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c +index 1028ab6d9a74..21a7e51fc3c1 100644 +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) + struct tracefs_inode *ti; + unsigned long flags; + +- ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); ++ ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); + if (!ti) + return NULL; + +-- +2.46.0 + diff --git a/queue-6.6/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch b/queue-6.6/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch new file mode 100644 index 00000000000..cd1df5e8fc2 --- /dev/null +++ b/queue-6.6/tracefs-use-generic-inode-rcu-for-synchronizing-freeing.patch @@ -0,0 +1,259 @@ +From 0b6743bd60a56a701070b89fb80c327a44b7b3e2 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Wed, 7 Aug 2024 18:54:02 -0400 +Subject: tracefs: Use generic inode RCU for synchronizing freeing + +From: Steven Rostedt + +commit 0b6743bd60a56a701070b89fb80c327a44b7b3e2 upstream. + +With structure layout randomization enabled for 'struct inode' we need to +avoid overlapping any of the RCU-used / initialized-only-once members, +e.g. i_lru or i_sb_list to not corrupt related list traversals when making +use of the rcu_head. + +For an unlucky structure layout of 'struct inode' we may end up with the +following splat when running the ftrace selftests: + +[<...>] list_del corruption, ffff888103ee2cb0->next (tracefs_inode_cache+0x0/0x4e0 [slab object]) is NULL (prev is tracefs_inode_cache+0x78/0x4e0 [slab object]) +[<...>] ------------[ cut here ]------------ +[<...>] kernel BUG at lib/list_debug.c:54! +[<...>] invalid opcode: 0000 [#1] PREEMPT SMP KASAN +[<...>] CPU: 3 PID: 2550 Comm: mount Tainted: G N 6.8.12-grsec+ #122 ed2f536ca62f28b087b90e3cc906a8d25b3ddc65 +[<...>] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 +[<...>] RIP: 0010:[] __list_del_entry_valid_or_report+0x138/0x3e0 +[<...>] Code: 48 b8 99 fb 65 f2 ff ff ff ff e9 03 5c d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff e9 33 5a d9 fc cc 48 b8 99 fb 65 f2 ff ff ff ff <0f> 0b 4c 89 e9 48 89 ea 48 89 ee 48 c7 c7 60 8f dd 89 31 c0 e8 2f +[<...>] RSP: 0018:fffffe80416afaf0 EFLAGS: 00010283 +[<...>] RAX: 0000000000000098 RBX: ffff888103ee2cb0 RCX: 0000000000000000 +[<...>] RDX: ffffffff84655fe8 RSI: ffffffff89dd8b60 RDI: 0000000000000001 +[<...>] RBP: ffff888103ee2cb0 R08: 0000000000000001 R09: fffffbd0082d5f25 +[<...>] R10: fffffe80416af92f R11: 0000000000000001 R12: fdf99c16731d9b6d +[<...>] R13: 0000000000000000 R14: ffff88819ad4b8b8 R15: 0000000000000000 +[<...>] RBX: tracefs_inode_cache+0x0/0x4e0 [slab object] +[<...>] RDX: __list_del_entry_valid_or_report+0x108/0x3e0 +[<...>] RSI: __func__.47+0x4340/0x4400 +[<...>] RBP: tracefs_inode_cache+0x0/0x4e0 [slab object] +[<...>] RSP: process kstack fffffe80416afaf0+0x7af0/0x8000 [mount 2550 2550] +[<...>] R09: kasan shadow of process kstack fffffe80416af928+0x7928/0x8000 [mount 2550 2550] +[<...>] R10: process kstack fffffe80416af92f+0x792f/0x8000 [mount 2550 2550] +[<...>] R14: tracefs_inode_cache+0x78/0x4e0 [slab object] +[<...>] FS: 00006dcb380c1840(0000) GS:ffff8881e0600000(0000) knlGS:0000000000000000 +[<...>] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[<...>] CR2: 000076ab72b30e84 CR3: 000000000b088004 CR4: 0000000000360ef0 shadow CR4: 0000000000360ef0 +[<...>] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[<...>] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[<...>] ASID: 0003 +[<...>] Stack: +[<...>] ffffffff818a2315 00000000f5c856ee ffffffff896f1840 ffff888103ee2cb0 +[<...>] ffff88812b6b9750 0000000079d714b6 fffffbfff1e9280b ffffffff8f49405f +[<...>] 0000000000000001 0000000000000000 ffff888104457280 ffffffff8248b392 +[<...>] Call Trace: +[<...>] +[<...>] [] ? lock_release+0x175/0x380 fffffe80416afaf0 +[<...>] [] list_lru_del+0x152/0x740 fffffe80416afb48 +[<...>] [] list_lru_del_obj+0x113/0x280 fffffe80416afb88 +[<...>] [] ? _atomic_dec_and_lock+0x119/0x200 fffffe80416afb90 +[<...>] [] iput_final+0x1c4/0x9a0 fffffe80416afbb8 +[<...>] [] dentry_unlink_inode+0x44b/0xaa0 fffffe80416afbf8 +[<...>] [] __dentry_kill+0x23c/0xf00 fffffe80416afc40 +[<...>] [] ? __this_cpu_preempt_check+0x1f/0xa0 fffffe80416afc48 +[<...>] [] ? shrink_dentry_list+0x1c5/0x760 fffffe80416afc70 +[<...>] [] ? shrink_dentry_list+0x51/0x760 fffffe80416afc78 +[<...>] [] shrink_dentry_list+0x288/0x760 fffffe80416afc80 +[<...>] [] shrink_dcache_sb+0x155/0x420 fffffe80416afcc8 +[<...>] [] ? debug_smp_processor_id+0x23/0xa0 fffffe80416afce0 +[<...>] [] ? do_one_tree+0x140/0x140 fffffe80416afcf8 +[<...>] [] ? do_remount+0x329/0xa00 fffffe80416afd18 +[<...>] [] ? security_sb_remount+0x81/0x1c0 fffffe80416afd38 +[<...>] [] reconfigure_super+0x856/0x14e0 fffffe80416afd70 +[<...>] [] ? ns_capable_common+0xe7/0x2a0 fffffe80416afd90 +[<...>] [] do_remount+0x416/0xa00 fffffe80416afdd0 +[<...>] [] path_mount+0x5c4/0x900 fffffe80416afe28 +[<...>] [] ? finish_automount+0x13a0/0x13a0 fffffe80416afe60 +[<...>] [] ? user_path_at_empty+0xb2/0x140 fffffe80416afe88 +[<...>] [] do_mount+0x115/0x1c0 fffffe80416afeb8 +[<...>] [] ? path_mount+0x900/0x900 fffffe80416afed8 +[<...>] [] ? __kasan_check_write+0x1c/0xa0 fffffe80416afee0 +[<...>] [] __do_sys_mount+0x12f/0x280 fffffe80416aff30 +[<...>] [] __x64_sys_mount+0xcd/0x2e0 fffffe80416aff70 +[<...>] [] ? syscall_trace_enter+0x218/0x380 fffffe80416aff88 +[<...>] [] x64_sys_call+0x5d5e/0x6720 fffffe80416affa8 +[<...>] [] do_syscall_64+0xcd/0x3c0 fffffe80416affb8 +[<...>] [] entry_SYSCALL_64_safe_stack+0x4c/0x87 fffffe80416affe8 +[<...>] +[<...>] +[<...>] RIP: 0033:[<00006dcb382ff66a>] vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] +[<...>] Code: 48 8b 0d 29 18 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f6 17 0d 00 f7 d8 64 89 01 48 +[<...>] RSP: 002b:0000763d68192558 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 +[<...>] RAX: ffffffffffffffda RBX: 00006dcb38433264 RCX: 00006dcb382ff66a +[<...>] RDX: 000017c3e0d11210 RSI: 000017c3e0d1a5a0 RDI: 000017c3e0d1ae70 +[<...>] RBP: 000017c3e0d10fb0 R08: 000017c3e0d11260 R09: 00006dcb383d1be0 +[<...>] R10: 000000000020002e R11: 0000000000000246 R12: 0000000000000000 +[<...>] R13: 000017c3e0d1ae70 R14: 000017c3e0d11210 R15: 000017c3e0d10fb0 +[<...>] RBX: vm_area_struct[mount 2550 2550 file 6dcb38433000-6dcb38434000 5b 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RCX: vm_area_struct[mount 2550 2550 file 6dcb38225000-6dcb3837e000 22 55(read|exec|mayread|mayexec)]+0x0/0xb8 [userland map] +[<...>] RDX: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RSI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RDI: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RBP: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] RSP: vm_area_struct[mount 2550 2550 anon 763d68173000-763d68195000 7ffffffdd 100133(read|write|mayread|maywrite|growsdown|account)]+0x0/0xb8 [userland map] +[<...>] R08: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R09: vm_area_struct[mount 2550 2550 file 6dcb383d1000-6dcb383d3000 1cd 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R13: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R14: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] R15: vm_area_struct[mount 2550 2550 anon 17c3e0d0f000-17c3e0d31000 17c3e0d0f 100033(read|write|mayread|maywrite|account)]+0x0/0xb8 [userland map] +[<...>] +[<...>] Modules linked in: +[<...>] ---[ end trace 0000000000000000 ]--- + +The list debug message as well as RBX's symbolic value point out that the +object in question was allocated from 'tracefs_inode_cache' and that the +list's '->next' member is at offset 0. Dumping the layout of the relevant +parts of 'struct tracefs_inode' gives the following: + + struct tracefs_inode { + union { + struct inode { + struct list_head { + struct list_head * next; /* 0 8 */ + struct list_head * prev; /* 8 8 */ + } i_lru; + [...] + } vfs_inode; + struct callback_head { + void (*func)(struct callback_head *); /* 0 8 */ + struct callback_head * next; /* 8 8 */ + } rcu; + }; + [...] + }; + +Above shows that 'vfs_inode.i_lru' overlaps with 'rcu' which will +destroy the 'i_lru' list as soon as the 'rcu' member gets used, e.g. in +call_rcu() or later when calling the RCU callback. This will disturb +concurrent list traversals as well as object reuse which assumes these +list heads will keep their integrity. + +For reproduction, the following diff manually overlays 'i_lru' with +'rcu' as, otherwise, one would require some good portion of luck for +gambling an unlucky RANDSTRUCT seed: + + --- a/include/linux/fs.h + +++ b/include/linux/fs.h + @@ -629,6 +629,7 @@ struct inode { + umode_t i_mode; + unsigned short i_opflags; + kuid_t i_uid; + + struct list_head i_lru; /* inode LRU list */ + kgid_t i_gid; + unsigned int i_flags; + + @@ -690,7 +691,6 @@ struct inode { + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; + #endif + - struct list_head i_lru; /* inode LRU list */ + struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ + union { + +The tracefs inode does not need to supply its own RCU delayed destruction +of its inode. The inode code itself offers both a "destroy_inode()" +callback that gets called when the last reference of the inode is +released, and the "free_inode()" which is called after a RCU +synchronization period from the "destroy_inode()". + +The tracefs code can unlink the inode from its list in the destroy_inode() +callback, and the simply free it from the free_inode() callback. This +should provide the same protection. + +Link: https://lore.kernel.org/all/20240807115143.45927-3-minipli@grsecurity.net/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Ajay Kaher +Cc: Ilkka =?utf-8?b?TmF1bGFww6TDpA==?= +Link: https://lore.kernel.org/20240807185402.61410544@gandalf.local.home +Fixes: baa23a8d4360 ("tracefs: Reset permissions on remount if permissions are options") +Reported-by: Mathias Krause +Reported-by: Brad Spengler +Suggested-by: Al Viro +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 10 ++++------ + fs/tracefs/internal.h | 5 +---- + include/linux/fs.h | 2 +- + 3 files changed, 6 insertions(+), 11 deletions(-) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode + return &ti->vfs_inode; + } + +-static void tracefs_free_inode_rcu(struct rcu_head *rcu) ++static void tracefs_free_inode(struct inode *inode) + { +- struct tracefs_inode *ti; ++ struct tracefs_inode *ti = get_tracefs(inode); + +- ti = container_of(rcu, struct tracefs_inode, rcu); + kmem_cache_free(tracefs_inode_cachep, ti); + } + +-static void tracefs_free_inode(struct inode *inode) ++static void tracefs_destroy_inode(struct inode *inode) + { + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; +@@ -69,8 +68,6 @@ static void tracefs_free_inode(struct in + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); +- +- call_rcu(&ti->rcu, tracefs_free_inode_rcu); + } + + static ssize_t default_read_file(struct file *file, char __user *buf, +@@ -458,6 +455,7 @@ static int tracefs_drop_inode(struct ino + static const struct super_operations tracefs_super_operations = { + .alloc_inode = tracefs_alloc_inode, + .free_inode = tracefs_free_inode, ++ .destroy_inode = tracefs_destroy_inode, + .drop_inode = tracefs_drop_inode, + .statfs = simple_statfs, + .remount_fs = tracefs_remount, +--- a/fs/tracefs/internal.h ++++ b/fs/tracefs/internal.h +@@ -10,10 +10,7 @@ enum { + }; + + struct tracefs_inode { +- union { +- struct inode vfs_inode; +- struct rcu_head rcu; +- }; ++ struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; + unsigned long flags; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -642,6 +642,7 @@ struct inode { + umode_t i_mode; + unsigned short i_opflags; + kuid_t i_uid; ++ struct list_head i_lru; /* inode LRU list */ + kgid_t i_gid; + unsigned int i_flags; + +@@ -703,7 +704,6 @@ struct inode { + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; + #endif +- struct list_head i_lru; /* inode LRU list */ + struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ + union {