From: Greg Kroah-Hartman Date: Mon, 27 Oct 2025 08:04:27 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v5.4.301~21 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=76165031daab183ee9e2da26d61a22cf4d5a2a57;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch fs-notify-call-exportfs_encode_fid-with-s_umount.patch fuse-allocate-ff-release_args-only-if-release-is-needed.patch fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch xfs-always-warn-about-deprecated-mount-options.patch --- diff --git a/queue-6.6/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch b/queue-6.6/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch new file mode 100644 index 0000000000..070e8f2b42 --- /dev/null +++ b/queue-6.6/devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch @@ -0,0 +1,389 @@ +From sashal@kernel.org Mon Oct 27 00:49:57 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 19:49:50 -0400 +Subject: devcoredump: Fix circular locking dependency with devcd->mutex. +To: stable@vger.kernel.org +Cc: Maarten Lankhorst , Mukesh Ojha , Greg Kroah-Hartman , Johannes Berg , "Rafael J. Wysocki" , Danilo Krummrich , linux-kernel@vger.kernel.org, Matthew Brost , Mukesh Ojha , Sasha Levin +Message-ID: <20251026234950.288779-1-sashal@kernel.org> + +From: Maarten Lankhorst + +[ Upstream commit a91c8096590bd7801a26454789f2992094fe36da ] + +The original code causes a circular locking dependency found by lockdep. + +====================================================== +WARNING: possible circular locking dependency detected +6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U +------------------------------------------------------ +xe_fault_inject/5091 is trying to acquire lock: +ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660 + +but task is already holding lock: + +ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 +which lock already depends on the new lock. +the existing dependency chain (in reverse order) is: +-> #2 (&devcd->mutex){+.+.}-{3:3}: + mutex_lock_nested+0x4e/0xc0 + devcd_data_write+0x27/0x90 + sysfs_kf_bin_write+0x80/0xf0 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +-> #1 (kn->active#236){++++}-{0:0}: + kernfs_drain+0x1e2/0x200 + __kernfs_remove+0xae/0x400 + kernfs_remove_by_name_ns+0x5d/0xc0 + remove_files+0x54/0x70 + sysfs_remove_group+0x3d/0xa0 + sysfs_remove_groups+0x2e/0x60 + device_remove_attrs+0xc7/0x100 + device_del+0x15d/0x3b0 + devcd_del+0x19/0x30 + process_one_work+0x22b/0x6f0 + worker_thread+0x1e8/0x3d0 + kthread+0x11c/0x250 + ret_from_fork+0x26c/0x2e0 + ret_from_fork_asm+0x1a/0x30 +-> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}: + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + __flush_work+0x27a/0x660 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +other info that might help us debug this: +Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex + Possible unsafe locking scenario: + CPU0 CPU1 + ---- ---- + lock(&devcd->mutex); + lock(kn->active#236); + lock(&devcd->mutex); + lock((work_completion)(&(&devcd->del_wk)->work)); + *** DEADLOCK *** +5 locks held by xe_fault_inject/5091: + #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0 + #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220 + #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280 + #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 + #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660 +stack backtrace: +CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)} +Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER +Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021 +Call Trace: + + dump_stack_lvl+0x91/0xf0 + dump_stack+0x10/0x20 + print_circular_bug+0x285/0x360 + check_noncircular+0x135/0x150 + ? register_lock_class+0x48/0x4a0 + __lock_acquire+0x1661/0x2860 + lock_acquire+0xc4/0x2f0 + ? __flush_work+0x25d/0x660 + ? mark_held_locks+0x46/0x90 + ? __flush_work+0x25d/0x660 + __flush_work+0x27a/0x660 + ? __flush_work+0x25d/0x660 + ? trace_hardirqs_on+0x1e/0xd0 + ? __pfx_wq_barrier_func+0x10/0x10 + flush_delayed_work+0x5d/0xa0 + dev_coredump_put+0x63/0xa0 + xe_driver_devcoredump_fini+0x12/0x20 [xe] + devm_action_release+0x12/0x30 + release_nodes+0x3a/0x120 + devres_release_all+0x8a/0xd0 + device_unbind_cleanup+0x12/0x80 + device_release_driver_internal+0x23a/0x280 + ? bus_find_device+0xa8/0xe0 + device_driver_detach+0x14/0x20 + unbind_store+0xaf/0xc0 + drv_attr_store+0x21/0x50 + sysfs_kf_write+0x4a/0x80 + kernfs_fop_write_iter+0x169/0x220 + vfs_write+0x293/0x560 + ksys_write+0x72/0xf0 + __x64_sys_write+0x19/0x30 + x64_sys_call+0x2bf/0x2660 + do_syscall_64+0x93/0xb60 + ? __f_unlock_pos+0x15/0x20 + ? __x64_sys_getdents64+0x9b/0x130 + ? __pfx_filldir64+0x10/0x10 + ? do_syscall_64+0x1a2/0xb60 + ? clear_bhb_loop+0x30/0x80 + ? clear_bhb_loop+0x30/0x80 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x76e292edd574 +Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 +RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574 +RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b +RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063 +R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000 + +xe 0000:03:00.0: [drm] Xe device coredump has been deleted. + +Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work") +Cc: Mukesh Ojha +Cc: Greg Kroah-Hartman +Cc: Johannes Berg +Cc: Rafael J. Wysocki +Cc: Danilo Krummrich +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org # v6.1+ +Signed-off-by: Maarten Lankhorst +Cc: Matthew Brost +Acked-by: Mukesh Ojha +Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se +Signed-off-by: Greg Kroah-Hartman +[ replaced disable_delayed_work_sync() with cancel_delayed_work_sync() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/devcoredump.c | 138 +++++++++++++++++++++++++++------------------ + 1 file changed, 84 insertions(+), 54 deletions(-) + +--- a/drivers/base/devcoredump.c ++++ b/drivers/base/devcoredump.c +@@ -26,50 +26,46 @@ struct devcd_entry { + void *data; + size_t datalen; + /* +- * Here, mutex is required to serialize the calls to del_wk work between +- * user/kernel space which happens when devcd is added with device_add() +- * and that sends uevent to user space. User space reads the uevents, +- * and calls to devcd_data_write() which try to modify the work which is +- * not even initialized/queued from devcoredump. ++ * There are 2 races for which mutex is required. + * ++ * The first race is between device creation and userspace writing to ++ * schedule immediately destruction. + * ++ * This race is handled by arming the timer before device creation, but ++ * when device creation fails the timer still exists. + * +- * cpu0(X) cpu1(Y) ++ * To solve this, hold the mutex during device_add(), and set ++ * init_completed on success before releasing the mutex. + * +- * dev_coredump() uevent sent to user space +- * device_add() ======================> user space process Y reads the +- * uevents writes to devcd fd +- * which results into writes to ++ * That way the timer will never fire until device_add() is called, ++ * it will do nothing if init_completed is not set. The timer is also ++ * cancelled in that case. + * +- * devcd_data_write() +- * mod_delayed_work() +- * try_to_grab_pending() +- * del_timer() +- * debug_assert_init() +- * INIT_DELAYED_WORK() +- * schedule_delayed_work() +- * +- * +- * Also, mutex alone would not be enough to avoid scheduling of +- * del_wk work after it get flush from a call to devcd_free() +- * mentioned as below. +- * +- * disabled_store() +- * devcd_free() +- * mutex_lock() devcd_data_write() +- * flush_delayed_work() +- * mutex_unlock() +- * mutex_lock() +- * mod_delayed_work() +- * mutex_unlock() +- * So, delete_work flag is required. ++ * The second race involves multiple parallel invocations of devcd_free(), ++ * add a deleted flag so only 1 can call the destructor. + */ + struct mutex mutex; +- bool delete_work; ++ bool init_completed, deleted; + struct module *owner; + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen); + void (*free)(void *data); ++ /* ++ * If nothing interferes and device_add() was returns success, ++ * del_wk will destroy the device after the timer fires. ++ * ++ * Multiple userspace processes can interfere in the working of the timer: ++ * - Writing to the coredump will reschedule the timer to run immediately, ++ * if still armed. ++ * ++ * This is handled by using "if (cancel_delayed_work()) { ++ * schedule_delayed_work() }", to prevent re-arming after having ++ * been previously fired. ++ * - Writing to /sys/class/devcoredump/disabled will destroy the ++ * coredump synchronously. ++ * This is handled by using disable_delayed_work_sync(), and then ++ * checking if deleted flag is set with &devcd->mutex held. ++ */ + struct delayed_work del_wk; + struct device *failing_dev; + }; +@@ -98,14 +94,27 @@ static void devcd_dev_release(struct dev + kfree(devcd); + } + ++static void __devcd_del(struct devcd_entry *devcd) ++{ ++ devcd->deleted = true; ++ device_del(&devcd->devcd_dev); ++ put_device(&devcd->devcd_dev); ++} ++ + static void devcd_del(struct work_struct *wk) + { + struct devcd_entry *devcd; ++ bool init_completed; + + devcd = container_of(wk, struct devcd_entry, del_wk.work); + +- device_del(&devcd->devcd_dev); +- put_device(&devcd->devcd_dev); ++ /* devcd->mutex serializes against dev_coredumpm_timeout */ ++ mutex_lock(&devcd->mutex); ++ init_completed = devcd->init_completed; ++ mutex_unlock(&devcd->mutex); ++ ++ if (init_completed) ++ __devcd_del(devcd); + } + + static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, +@@ -125,12 +134,12 @@ static ssize_t devcd_data_write(struct f + struct device *dev = kobj_to_dev(kobj); + struct devcd_entry *devcd = dev_to_devcd(dev); + +- mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) { +- devcd->delete_work = true; +- mod_delayed_work(system_wq, &devcd->del_wk, 0); +- } +- mutex_unlock(&devcd->mutex); ++ /* ++ * Although it's tempting to use mod_delayed work here, ++ * that will cause a reschedule if the timer already fired. ++ */ ++ if (cancel_delayed_work(&devcd->del_wk)) ++ schedule_delayed_work(&devcd->del_wk, 0); + + return count; + } +@@ -158,11 +167,21 @@ static int devcd_free(struct device *dev + { + struct devcd_entry *devcd = dev_to_devcd(dev); + ++ /* ++ * To prevent a race with devcd_data_write(), cancel work and ++ * complete manually instead. ++ * ++ * We cannot rely on the return value of ++ * cancel_delayed_work_sync() here, because it might be in the ++ * middle of a cancel_delayed_work + schedule_delayed_work pair. ++ * ++ * devcd->mutex here guards against multiple parallel invocations ++ * of devcd_free(). ++ */ ++ cancel_delayed_work_sync(&devcd->del_wk); + mutex_lock(&devcd->mutex); +- if (!devcd->delete_work) +- devcd->delete_work = true; +- +- flush_delayed_work(&devcd->del_wk); ++ if (!devcd->deleted) ++ __devcd_del(devcd); + mutex_unlock(&devcd->mutex); + return 0; + } +@@ -186,12 +205,10 @@ static ssize_t disabled_show(const struc + * put_device() <- last reference + * error = fn(dev, data) devcd_dev_release() + * devcd_free(dev, data) kfree(devcd) +- * mutex_lock(&devcd->mutex); + * + * +- * In the above diagram, It looks like disabled_store() would be racing with parallely +- * running devcd_del() and result in memory abort while acquiring devcd->mutex which +- * is called after kfree of devcd memory after dropping its last reference with ++ * In the above diagram, it looks like disabled_store() would be racing with parallelly ++ * running devcd_del() and result in memory abort after dropping its last reference with + * put_device(). However, this will not happens as fn(dev, data) runs + * with its own reference to device via klist_node so it is not its last reference. + * so, above situation would not occur. +@@ -352,7 +369,7 @@ void dev_coredumpm(struct device *dev, s + devcd->read = read; + devcd->free = free; + devcd->failing_dev = get_device(dev); +- devcd->delete_work = false; ++ devcd->deleted = false; + + mutex_init(&devcd->mutex); + device_initialize(&devcd->devcd_dev); +@@ -361,8 +378,14 @@ void dev_coredumpm(struct device *dev, s + atomic_inc_return(&devcd_count)); + devcd->devcd_dev.class = &devcd_class; + +- mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); ++ ++ /* devcd->mutex prevents devcd_del() completing until init finishes */ ++ mutex_lock(&devcd->mutex); ++ devcd->init_completed = false; ++ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); ++ schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); ++ + if (device_add(&devcd->devcd_dev)) + goto put_device; + +@@ -379,13 +402,20 @@ void dev_coredumpm(struct device *dev, s + + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); +- INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); +- schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); ++ ++ /* ++ * Safe to run devcd_del() now that we are done with devcd_dev. ++ * Alternatively we could have taken a ref on devcd_dev before ++ * dropping the lock. ++ */ ++ devcd->init_completed = true; + mutex_unlock(&devcd->mutex); + return; + put_device: +- put_device(&devcd->devcd_dev); + mutex_unlock(&devcd->mutex); ++ cancel_delayed_work_sync(&devcd->del_wk); ++ put_device(&devcd->devcd_dev); ++ + put_module: + module_put(owner); + free: diff --git a/queue-6.6/fs-notify-call-exportfs_encode_fid-with-s_umount.patch b/queue-6.6/fs-notify-call-exportfs_encode_fid-with-s_umount.patch new file mode 100644 index 0000000000..cbfc89396b --- /dev/null +++ b/queue-6.6/fs-notify-call-exportfs_encode_fid-with-s_umount.patch @@ -0,0 +1,111 @@ +From stable+bounces-189868-greg=kroah.com@vger.kernel.org Sun Oct 26 17:05:06 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 12:04:56 -0400 +Subject: fs/notify: call exportfs_encode_fid with s_umount +To: stable@vger.kernel.org +Cc: Jakub Acs , Jan Kara , Amir Goldstein , Miklos Szeredi , Christian Brauner , linux-unionfs@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, Sasha Levin +Message-ID: <20251026160456.99836-1-sashal@kernel.org> + +From: Jakub Acs + +[ Upstream commit a7c4bb43bfdc2b9f06ee9d036028ed13a83df42a ] + +Calling intotify_show_fdinfo() on fd watching an overlayfs inode, while +the overlayfs is being unmounted, can lead to dereferencing NULL ptr. + +This issue was found by syzkaller. + +Race Condition Diagram: + +Thread 1 Thread 2 +-------- -------- + +generic_shutdown_super() + shrink_dcache_for_umount + sb->s_root = NULL + + | + | vfs_read() + | inotify_fdinfo() + | * inode get from mark * + | show_mark_fhandle(m, inode) + | exportfs_encode_fid(inode, ..) + | ovl_encode_fh(inode, ..) + | ovl_check_encode_origin(inode) + | * deref i_sb->s_root * + | + | + v + fsnotify_sb_delete(sb) + +Which then leads to: + +[ 32.133461] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 32.134438] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] +[ 32.135032] CPU: 1 UID: 0 PID: 4468 Comm: systemd-coredum Not tainted 6.17.0-rc6 #22 PREEMPT(none) + + + +[ 32.143353] Call Trace: +[ 32.143732] ovl_encode_fh+0xd5/0x170 +[ 32.144031] exportfs_encode_inode_fh+0x12f/0x300 +[ 32.144425] show_mark_fhandle+0xbe/0x1f0 +[ 32.145805] inotify_fdinfo+0x226/0x2d0 +[ 32.146442] inotify_show_fdinfo+0x1c5/0x350 +[ 32.147168] seq_show+0x530/0x6f0 +[ 32.147449] seq_read_iter+0x503/0x12a0 +[ 32.148419] seq_read+0x31f/0x410 +[ 32.150714] vfs_read+0x1f0/0x9e0 +[ 32.152297] ksys_read+0x125/0x240 + +IOW ovl_check_encode_origin derefs inode->i_sb->s_root, after it was set +to NULL in the unmount path. + +Fix it by protecting calling exportfs_encode_fid() from +show_mark_fhandle() with s_umount lock. + +This form of fix was suggested by Amir in [1]. + +[1]: https://lore.kernel.org/all/CAOQ4uxhbDwhb+2Brs1UdkoF0a3NSdBAOQPNfEHjahrgoKJpLEw@mail.gmail.com/ + +Fixes: c45beebfde34 ("ovl: support encoding fid from inode with no alias") +Signed-off-by: Jakub Acs +Cc: Jan Kara +Cc: Amir Goldstein +Cc: Miklos Szeredi +Cc: Christian Brauner +Cc: linux-unionfs@vger.kernel.org +Cc: linux-fsdevel@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Jan Kara +[ Adjust context ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/notify/fdinfo.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/notify/fdinfo.c ++++ b/fs/notify/fdinfo.c +@@ -17,6 +17,7 @@ + #include "fanotify/fanotify.h" + #include "fdinfo.h" + #include "fsnotify.h" ++#include "../internal.h" + + #if defined(CONFIG_PROC_FS) + +@@ -50,7 +51,12 @@ static void show_mark_fhandle(struct seq + f.handle.handle_bytes = sizeof(f.pad); + size = f.handle.handle_bytes >> 2; + ++ if (!super_trylock_shared(inode->i_sb)) ++ return; ++ + ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); ++ up_read(&inode->i_sb->s_umount); ++ + if ((ret == FILEID_INVALID) || (ret < 0)) + return; + diff --git a/queue-6.6/fuse-allocate-ff-release_args-only-if-release-is-needed.patch b/queue-6.6/fuse-allocate-ff-release_args-only-if-release-is-needed.patch new file mode 100644 index 0000000000..adfa40d3e4 --- /dev/null +++ b/queue-6.6/fuse-allocate-ff-release_args-only-if-release-is-needed.patch @@ -0,0 +1,241 @@ +From stable+bounces-188825-greg=kroah.com@vger.kernel.org Tue Oct 21 22:16:29 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 16:16:18 -0400 +Subject: fuse: allocate ff->release_args only if release is needed +To: stable@vger.kernel.org +Cc: Amir Goldstein , Miklos Szeredi , Sasha Levin +Message-ID: <20251021201619.2922630-1-sashal@kernel.org> + +From: Amir Goldstein + +[ Upstream commit e26ee4efbc79610b20e7abe9d96c87f33dacc1ff ] + +This removed the need to pass isdir argument to fuse_put_file(). + +Signed-off-by: Amir Goldstein +Signed-off-by: Miklos Szeredi +Stable-dep-of: 26e5c67deb2e ("fuse: fix livelock in synchronous file put from fuseblk workers") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/dir.c | 2 - + fs/fuse/file.c | 69 +++++++++++++++++++++++++++++++------------------------ + fs/fuse/fuse_i.h | 2 - + 3 files changed, 41 insertions(+), 32 deletions(-) + +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -634,7 +634,7 @@ static int fuse_create_open(struct inode + goto out_err; + + err = -ENOMEM; +- ff = fuse_file_alloc(fm); ++ ff = fuse_file_alloc(fm, true); + if (!ff) + goto out_put_forget_req; + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -55,7 +55,7 @@ struct fuse_release_args { + struct inode *inode; + }; + +-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) ++struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release) + { + struct fuse_file *ff; + +@@ -64,11 +64,13 @@ struct fuse_file *fuse_file_alloc(struct + return NULL; + + ff->fm = fm; +- ff->release_args = kzalloc(sizeof(*ff->release_args), +- GFP_KERNEL_ACCOUNT); +- if (!ff->release_args) { +- kfree(ff); +- return NULL; ++ if (release) { ++ ff->release_args = kzalloc(sizeof(*ff->release_args), ++ GFP_KERNEL_ACCOUNT); ++ if (!ff->release_args) { ++ kfree(ff); ++ return NULL; ++ } + } + + INIT_LIST_HEAD(&ff->write_entry); +@@ -104,14 +106,14 @@ static void fuse_release_end(struct fuse + kfree(ra); + } + +-static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) ++static void fuse_file_put(struct fuse_file *ff, bool sync) + { + if (refcount_dec_and_test(&ff->count)) { +- struct fuse_args *args = &ff->release_args->args; ++ struct fuse_release_args *ra = ff->release_args; ++ struct fuse_args *args = (ra ? &ra->args : NULL); + +- if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { +- /* Do nothing when client does not implement 'open' */ +- fuse_release_end(ff->fm, args, 0); ++ if (!args) { ++ /* Do nothing when server does not implement 'open' */ + } else if (sync) { + fuse_simple_request(ff->fm, args); + fuse_release_end(ff->fm, args, 0); +@@ -131,15 +133,16 @@ struct fuse_file *fuse_file_open(struct + struct fuse_conn *fc = fm->fc; + struct fuse_file *ff; + int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ++ bool open = isdir ? !fc->no_opendir : !fc->no_open; + +- ff = fuse_file_alloc(fm); ++ ff = fuse_file_alloc(fm, open); + if (!ff) + return ERR_PTR(-ENOMEM); + + ff->fh = 0; + /* Default for no-open */ + ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0); +- if (isdir ? !fc->no_opendir : !fc->no_open) { ++ if (open) { + struct fuse_open_out outarg; + int err; + +@@ -147,11 +150,13 @@ struct fuse_file *fuse_file_open(struct + if (!err) { + ff->fh = outarg.fh; + ff->open_flags = outarg.open_flags; +- + } else if (err != -ENOSYS) { + fuse_file_free(ff); + return ERR_PTR(err); + } else { ++ /* No release needed */ ++ kfree(ff->release_args); ++ ff->release_args = NULL; + if (isdir) + fc->no_opendir = 1; + else +@@ -273,7 +278,7 @@ out_inode_unlock: + } + + static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, +- unsigned int flags, int opcode) ++ unsigned int flags, int opcode, bool sync) + { + struct fuse_conn *fc = ff->fm->fc; + struct fuse_release_args *ra = ff->release_args; +@@ -291,6 +296,9 @@ static void fuse_prepare_release(struct + + wake_up_interruptible_all(&ff->poll_wait); + ++ if (!ra) ++ return; ++ + ra->inarg.fh = ff->fh; + ra->inarg.flags = flags; + ra->args.in_numargs = 1; +@@ -300,6 +308,13 @@ static void fuse_prepare_release(struct + ra->args.nodeid = ff->nodeid; + ra->args.force = true; + ra->args.nocreds = true; ++ ++ /* ++ * Hold inode until release is finished. ++ * From fuse_sync_release() the refcount is 1 and everything's ++ * synchronous, so we are fine with not doing igrab() here. ++ */ ++ ra->inode = sync ? NULL : igrab(&fi->inode); + } + + void fuse_file_release(struct inode *inode, struct fuse_file *ff, +@@ -309,14 +324,12 @@ void fuse_file_release(struct inode *ino + struct fuse_release_args *ra = ff->release_args; + int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; + +- fuse_prepare_release(fi, ff, open_flags, opcode); ++ fuse_prepare_release(fi, ff, open_flags, opcode, false); + +- if (ff->flock) { ++ if (ra && ff->flock) { + ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id); + } +- /* Hold inode until release is finished */ +- ra->inode = igrab(inode); + + /* + * Normally this will send the RELEASE request, however if +@@ -327,7 +340,7 @@ void fuse_file_release(struct inode *ino + * synchronous RELEASE is allowed (and desirable) in this case + * because the server can be trusted not to screw up. + */ +- fuse_file_put(ff, ff->fm->fc->destroy, isdir); ++ fuse_file_put(ff, ff->fm->fc->destroy); + } + + void fuse_release_common(struct file *file, bool isdir) +@@ -362,12 +375,8 @@ void fuse_sync_release(struct fuse_inode + unsigned int flags) + { + WARN_ON(refcount_read(&ff->count) > 1); +- fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); +- /* +- * iput(NULL) is a no-op and since the refcount is 1 and everything's +- * synchronous, we are fine with not doing igrab() here" +- */ +- fuse_file_put(ff, true, false); ++ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true); ++ fuse_file_put(ff, true); + } + EXPORT_SYMBOL_GPL(fuse_sync_release); + +@@ -924,7 +933,7 @@ static void fuse_readpages_end(struct fu + put_page(page); + } + if (ia->ff) +- fuse_file_put(ia->ff, false, false); ++ fuse_file_put(ia->ff, false); + + fuse_io_free(ia); + } +@@ -1666,7 +1675,7 @@ static void fuse_writepage_free(struct f + __free_page(ap->pages[i]); + + if (wpa->ia.ff) +- fuse_file_put(wpa->ia.ff, false, false); ++ fuse_file_put(wpa->ia.ff, false); + + kfree(ap->pages); + kfree(wpa); +@@ -1914,7 +1923,7 @@ int fuse_write_inode(struct inode *inode + ff = __fuse_write_file_get(fi); + err = fuse_flush_times(inode, ff); + if (ff) +- fuse_file_put(ff, false, false); ++ fuse_file_put(ff, false); + + return err; + } +@@ -2312,7 +2321,7 @@ static int fuse_writepages(struct addres + fuse_writepages_send(&data); + } + if (data.ff) +- fuse_file_put(data.ff, false, false); ++ fuse_file_put(data.ff, false); + + kfree(data.orig_pages); + out: +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -1036,7 +1036,7 @@ void fuse_read_args_fill(struct fuse_io_ + */ + int fuse_open_common(struct inode *inode, struct file *file, bool isdir); + +-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm); ++struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release); + void fuse_file_free(struct fuse_file *ff); + void fuse_finish_open(struct inode *inode, struct file *file); + diff --git a/queue-6.6/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch b/queue-6.6/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch new file mode 100644 index 0000000000..e75cda4bb2 --- /dev/null +++ b/queue-6.6/fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch @@ -0,0 +1,94 @@ +From stable+bounces-188826-greg=kroah.com@vger.kernel.org Tue Oct 21 22:16:59 2025 +From: Sasha Levin +Date: Tue, 21 Oct 2025 16:16:19 -0400 +Subject: fuse: fix livelock in synchronous file put from fuseblk workers +To: stable@vger.kernel.org +Cc: "Darrick J. Wong" , Miklos Szeredi , Sasha Levin +Message-ID: <20251021201619.2922630-2-sashal@kernel.org> + +From: "Darrick J. Wong" + +[ Upstream commit 26e5c67deb2e1f42a951f022fdf5b9f7eb747b01 ] + +I observed a hang when running generic/323 against a fuseblk server. +This test opens a file, initiates a lot of AIO writes to that file +descriptor, and closes the file descriptor before the writes complete. +Unsurprisingly, the AIO exerciser threads are mostly stuck waiting for +responses from the fuseblk server: + +# cat /proc/372265/task/372313/stack +[<0>] request_wait_answer+0x1fe/0x2a0 [fuse] +[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse] +[<0>] fuse_do_getattr+0xfc/0x1f0 [fuse] +[<0>] fuse_file_read_iter+0xbe/0x1c0 [fuse] +[<0>] aio_read+0x130/0x1e0 +[<0>] io_submit_one+0x542/0x860 +[<0>] __x64_sys_io_submit+0x98/0x1a0 +[<0>] do_syscall_64+0x37/0xf0 +[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +But the /weird/ part is that the fuseblk server threads are waiting for +responses from itself: + +# cat /proc/372210/task/372232/stack +[<0>] request_wait_answer+0x1fe/0x2a0 [fuse] +[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse] +[<0>] fuse_file_put+0x9a/0xd0 [fuse] +[<0>] fuse_release+0x36/0x50 [fuse] +[<0>] __fput+0xec/0x2b0 +[<0>] task_work_run+0x55/0x90 +[<0>] syscall_exit_to_user_mode+0xe9/0x100 +[<0>] do_syscall_64+0x43/0xf0 +[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +The fuseblk server is fuse2fs so there's nothing all that exciting in +the server itself. So why is the fuse server calling fuse_file_put? +The commit message for the fstest sheds some light on that: + +"By closing the file descriptor before calling io_destroy, you pretty +much guarantee that the last put on the ioctx will be done in interrupt +context (during I/O completion). + +Aha. AIO fgets a new struct file from the fd when it queues the ioctx. +The completion of the FUSE_WRITE command from userspace causes the fuse +server to call the AIO completion function. The completion puts the +struct file, queuing a delayed fput to the fuse server task. When the +fuse server task returns to userspace, it has to run the delayed fput, +which in the case of a fuseblk server, it does synchronously. + +Sending the FUSE_RELEASE command sychronously from fuse server threads +is a bad idea because a client program can initiate enough simultaneous +AIOs such that all the fuse server threads end up in delayed_fput, and +now there aren't any threads left to handle the queued fuse commands. + +Fix this by only using asynchronous fputs when closing files, and leave +a comment explaining why. + +Cc: stable@vger.kernel.org # v2.6.38 +Fixes: 5a18ec176c934c ("fuse: fix hang of single threaded fuseblk filesystem") +Signed-off-by: Darrick J. Wong +Signed-off-by: Miklos Szeredi +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/file.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -339,8 +339,14 @@ void fuse_file_release(struct inode *ino + * Make the release synchronous if this is a fuseblk mount, + * synchronous RELEASE is allowed (and desirable) in this case + * because the server can be trusted not to screw up. ++ * ++ * Always use the asynchronous file put because the current thread ++ * might be the fuse server. This can happen if a process starts some ++ * aio and closes the fd before the aio completes. Since aio takes its ++ * own ref to the file, the IO completion has to drop the ref, which is ++ * how the fuse server can end up closing its clients' files. + */ +- fuse_file_put(ff, ff->fm->fc->destroy); ++ fuse_file_put(ff, false); + } + + void fuse_release_common(struct file *file, bool isdir) diff --git a/queue-6.6/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch b/queue-6.6/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch new file mode 100644 index 0000000000..4fb36fed93 --- /dev/null +++ b/queue-6.6/s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch @@ -0,0 +1,95 @@ +From stable+bounces-189106-greg=kroah.com@vger.kernel.org Thu Oct 23 13:49:33 2025 +From: Vineeth Vijayan +Date: Thu, 23 Oct 2025 13:49:13 +0200 +Subject: s390/cio: Update purge function to unregister the unused subchannels +To: stable@vger.kernel.org +Cc: hca@linux.ibm.com, oberpar@linux.ibm.com +Message-ID: <20251023114913.2143450-1-vneethv@linux.ibm.com> + +From: Vineeth Vijayan + +commit 9daa5a8795865f9a3c93d8d1066785b07ded6073 upstream. + +Starting with 'commit 2297791c92d0 ("s390/cio: dont unregister +subchannel from child-drivers")', cio no longer unregisters +subchannels when the attached device is invalid or unavailable. + +As an unintended side-effect, the cio_ignore purge function no longer +removes subchannels for devices on the cio_ignore list if no CCW device +is attached. This situation occurs when a CCW device is non-operational +or unavailable + +To ensure the same outcome of the purge function as when the +current cio_ignore list had been active during boot, update the purge +function to remove I/O subchannels without working CCW devices if the +associated device number is found on the cio_ignore list. + +Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") +Suggested-by: Peter Oberparleiter +Reviewed-by: Peter Oberparleiter +Signed-off-by: Vineeth Vijayan +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/cio/device.c | 39 +++++++++++++++++++++++++-------------- + 1 file changed, 25 insertions(+), 14 deletions(-) + +--- a/drivers/s390/cio/device.c ++++ b/drivers/s390/cio/device.c +@@ -1318,23 +1318,34 @@ void ccw_device_schedule_recovery(void) + spin_unlock_irqrestore(&recovery_lock, flags); + } + +-static int purge_fn(struct device *dev, void *data) ++static int purge_fn(struct subchannel *sch, void *data) + { +- struct ccw_device *cdev = to_ccwdev(dev); +- struct ccw_dev_id *id = &cdev->private->dev_id; +- struct subchannel *sch = to_subchannel(cdev->dev.parent); +- +- spin_lock_irq(cdev->ccwlock); +- if (is_blacklisted(id->ssid, id->devno) && +- (cdev->private->state == DEV_STATE_OFFLINE) && +- (atomic_cmpxchg(&cdev->private->onoff, 0, 1) == 0)) { +- CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", id->ssid, +- id->devno); ++ struct ccw_device *cdev; ++ ++ spin_lock_irq(sch->lock); ++ if (sch->st != SUBCHANNEL_TYPE_IO || !sch->schib.pmcw.dnv) ++ goto unlock; ++ ++ if (!is_blacklisted(sch->schid.ssid, sch->schib.pmcw.dev)) ++ goto unlock; ++ ++ cdev = sch_get_cdev(sch); ++ if (cdev) { ++ if (cdev->private->state != DEV_STATE_OFFLINE) ++ goto unlock; ++ ++ if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0) ++ goto unlock; + ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); +- css_sched_sch_todo(sch, SCH_TODO_UNREG); + atomic_set(&cdev->private->onoff, 0); + } +- spin_unlock_irq(cdev->ccwlock); ++ ++ css_sched_sch_todo(sch, SCH_TODO_UNREG); ++ CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x%s\n", sch->schid.ssid, ++ sch->schib.pmcw.dev, cdev ? "" : " (no cdev)"); ++ ++unlock: ++ spin_unlock_irq(sch->lock); + /* Abort loop in case of pending signal. */ + if (signal_pending(current)) + return -EINTR; +@@ -1350,7 +1361,7 @@ static int purge_fn(struct device *dev, + int ccw_purge_blacklisted(void) + { + CIO_MSG_EVENT(2, "ccw: purging blacklisted devices\n"); +- bus_for_each_dev(&ccw_bus_type, NULL, NULL, purge_fn); ++ for_each_subchannel_staged(purge_fn, NULL, NULL); + return 0; + } + diff --git a/queue-6.6/series b/queue-6.6/series index 6aa35fdf3b..d293453b1b 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -74,3 +74,10 @@ dt-bindings-usb-dwc3-imx8mp-dma-range-is-required-only-for-imx8mp.patch serial-8250_dw-handle-reset-control-deassert-error.patch serial-8250_exar-add-support-for-advantech-2-port-card-with-device-id-0x0018.patch serial-8250_mtk-enable-baud-clock-and-manage-in-runtime-pm.patch +devcoredump-fix-circular-locking-dependency-with-devcd-mutex.patch +xfs-always-warn-about-deprecated-mount-options.patch +fs-notify-call-exportfs_encode_fid-with-s_umount.patch +x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch +s390-cio-update-purge-function-to-unregister-the-unused-subchannels.patch +fuse-allocate-ff-release_args-only-if-release-is-needed.patch +fuse-fix-livelock-in-synchronous-file-put-from-fuseblk-workers.patch diff --git a/queue-6.6/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch b/queue-6.6/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch new file mode 100644 index 0000000000..d8c2c7b97f --- /dev/null +++ b/queue-6.6/x86-resctrl-fix-miscount-of-bandwidth-event-when-reactivating-previously-unavailable-rmid.patch @@ -0,0 +1,134 @@ +From stable+bounces-189148-greg=kroah.com@vger.kernel.org Thu Oct 23 18:15:45 2025 +From: Babu Moger +Date: Thu, 23 Oct 2025 11:12:40 -0500 +Subject: x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID +To: +Message-ID: <20251023161240.75240-1-babu.moger@amd.com> + +From: Babu Moger + +Users can create as many monitoring groups as the number of RMIDs supported +by the hardware. However, on AMD systems, only a limited number of RMIDs +are guaranteed to be actively tracked by the hardware. RMIDs that exceed +this limit are placed in an "Unavailable" state. + +When a bandwidth counter is read for such an RMID, the hardware sets +MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked +again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable +remains set on first read after tracking re-starts and is clear on all +subsequent reads as long as the RMID is tracked. + +resctrl miscounts the bandwidth events after an RMID transitions from the +"Unavailable" state back to being tracked. This happens because when the +hardware starts counting again after resetting the counter to zero, resctrl +in turn compares the new count against the counter value stored from the +previous time the RMID was tracked. + +This results in resctrl computing an event value that is either undercounting +(when new counter is more than stored counter) or a mistaken overflow (when +new counter is less than stored counter). + +Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to +zero whenever the RMID is in the "Unavailable" state to ensure accurate +counting after the RMID resets to zero when it starts to be tracked again. + +Example scenario that results in mistaken overflow +================================================== +1. The resctrl filesystem is mounted, and a task is assigned to a + monitoring group. + + $mount -t resctrl resctrl /sys/fs/resctrl + $mkdir /sys/fs/resctrl/mon_groups/test1/ + $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 21323 <- Total bytes on domain 0 + "Unavailable" <- Total bytes on domain 1 + + Task is running on domain 0. Counter on domain 1 is "Unavailable". + +2. The task runs on domain 0 for a while and then moves to domain 1. The + counter starts incrementing on domain 1. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 7345357 <- Total bytes on domain 0 + 4545 <- Total bytes on domain 1 + +3. At some point, the RMID in domain 0 transitions to the "Unavailable" + state because the task is no longer executing in that domain. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + "Unavailable" <- Total bytes on domain 0 + 434341 <- Total bytes on domain 1 + +4. Since the task continues to migrate between domains, it may eventually + return to domain 0. + + $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes + 17592178699059 <- Overflow on domain 0 + 3232332 <- Total bytes on domain 1 + +In this case, the RMID on domain 0 transitions from "Unavailable" state to +active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when +the counter is read and begins tracking the RMID counting from 0. + +Subsequent reads succeed but return a value smaller than the previously +saved MSR value (7345357). Consequently, the resctrl's overflow logic is +triggered, it compares the previous value (7345357) with the new, smaller +value and incorrectly interprets this as a counter overflow, adding a large +delta. + +In reality, this is a false positive: the counter did not overflow but was +simply reset when the RMID transitioned from "Unavailable" back to active +state. + +Here is the text from APM [1] available from [2]. + +"In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the +first QM_CTR read when it begins tracking an RMID that it was not +previously tracking. The U bit will be zero for all subsequent reads from +that RMID while it is still tracked by the hardware. Therefore, a QM_CTR +read with the U bit set when that RMID is in use by a processor can be +considered 0 when calculating the difference with a subsequent read." + +[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming + Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory + Bandwidth (MBM). + + [ bp: Split commit message into smaller paragraph chunks for better + consumption. ] + +Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") +Signed-off-by: Babu Moger +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Tested-by: Reinette Chatre +Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] +(cherry picked from commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92) +[babu.moger@amd.com: Fix conflict for v6.6 stable] +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -241,11 +241,15 @@ int resctrl_arch_rmid_read(struct rdt_re + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + ++ am = get_arch_mbm_state(hw_dom, rmid, eventid); ++ + ret = __rmid_read(rmid, eventid, &msr_val); +- if (ret) ++ if (ret) { ++ if (am && ret == -EINVAL) ++ am->prev_msr = 0; + return ret; ++ } + +- am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { + am->chunks += mbm_overflow_count(am->prev_msr, msr_val, + hw_res->mbm_width); diff --git a/queue-6.6/xfs-always-warn-about-deprecated-mount-options.patch b/queue-6.6/xfs-always-warn-about-deprecated-mount-options.patch new file mode 100644 index 0000000000..f8f57484da --- /dev/null +++ b/queue-6.6/xfs-always-warn-about-deprecated-mount-options.patch @@ -0,0 +1,93 @@ +From stable+bounces-189890-greg=kroah.com@vger.kernel.org Sun Oct 26 23:50:17 2025 +From: Sasha Levin +Date: Sun, 26 Oct 2025 18:50:08 -0400 +Subject: xfs: always warn about deprecated mount options +To: stable@vger.kernel.org +Cc: "Darrick J. Wong" , Christoph Hellwig , Carlos Maiolino , Carlos Maiolino , Sasha Levin +Message-ID: <20251026225008.272115-1-sashal@kernel.org> + +From: "Darrick J. Wong" + +[ Upstream commit 630785bfbe12c3ee3ebccd8b530a98d632b7e39d ] + +The deprecation of the 'attr2' mount option in 6.18 wasn't entirely +successful because nobody noticed that the kernel never printed a +warning about attr2 being set in fstab if the only xfs filesystem is the +root fs; the initramfs mounts the root fs with no mount options; and the +init scripts only conveyed the fstab options by remounting the root fs. + +Fix this by making it complain all the time. + +Cc: stable@vger.kernel.org # v5.13 +Fixes: 92cf7d36384b99 ("xfs: Skip repetitive warnings about mount options") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Carlos Maiolino +Signed-off-by: Carlos Maiolino +[ Update existing xfs_fs_warn_deprecated() callers ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_super.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -1230,16 +1230,25 @@ suffix_kstrtoint( + static inline void + xfs_fs_warn_deprecated( + struct fs_context *fc, +- struct fs_parameter *param, +- uint64_t flag, +- bool value) ++ struct fs_parameter *param) + { +- /* Don't print the warning if reconfiguring and current mount point +- * already had the flag set ++ /* ++ * Always warn about someone passing in a deprecated mount option. ++ * Previously we wouldn't print the warning if we were reconfiguring ++ * and current mount point already had the flag set, but that was not ++ * the right thing to do. ++ * ++ * Many distributions mount the root filesystem with no options in the ++ * initramfs and rely on mount -a to remount the root fs with the ++ * options in fstab. However, the old behavior meant that there would ++ * never be a warning about deprecated mount options for the root fs in ++ * /etc/fstab. On a single-fs system, that means no warning at all. ++ * ++ * Compounding this problem are distribution scripts that copy ++ * /proc/mounts to fstab, which means that we can't remove mount ++ * options unless we're 100% sure they have only ever been advertised ++ * in /proc/mounts in response to explicitly provided mount options. + */ +- if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && +- !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) +- return; + xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); + } + +@@ -1378,19 +1387,19 @@ xfs_fs_parse_param( + #endif + /* Following mount options will be removed in September 2025 */ + case Opt_ikeep: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_IKEEP; + return 0; + case Opt_noikeep: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features &= ~XFS_FEAT_IKEEP; + return 0; + case Opt_attr2: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_ATTR2; + return 0; + case Opt_noattr2: +- xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); ++ xfs_fs_warn_deprecated(fc, param); + parsing_mp->m_features |= XFS_FEAT_NOATTR2; + return 0; + default: