From: Greg Kroah-Hartman Date: Mon, 12 May 2025 10:35:24 +0000 (+0200) Subject: 6.14-stable patches X-Git-Tag: v5.15.183~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2e563631f104181a9658937400cb4d491ed69a94;p=thirdparty%2Fkernel%2Fstable-queue.git 6.14-stable patches added patches: accel-ivpu-increase-state-dump-msg-timeout.patch arm64-cpufeature-move-arm64_use_ng_mappings-to-the-.data-section-to-prevent-wrong-idmap-generation.patch clocksource-i8253-use-raw_spinlock_irqsave-in-clockevent_i8253_disable.patch kvm-arm64-fix-uninitialized-memcache-pointer-in-user_mem_abort.patch memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch module-ensure-that-kobject_put-is-safe-for-module-type-kobjects.patch ocfs2-fix-panic-in-failed-foilio-allocation.patch ocfs2-fix-the-issue-with-discontiguous-allocation-in-the-global_bitmap.patch ocfs2-implement-handshaking-with-ocfs2-recovery-thread.patch ocfs2-stop-quota-recovery-before-disabling-quotas.patch ocfs2-switch-osb-disable_recovery-to-enum.patch smb-client-avoid-race-in-open_cached_dir-with-lease-breaks.patch usb-cdnsp-fix-issue-with-resuming-from-l1.patch usb-cdnsp-fix-l1-resume-issue-for-rtl_revision_new_lpm-version.patch usb-dwc3-gadget-make-gadget_wakeup-asynchronous.patch usb-gadget-f_ecm-add-get_status-callback.patch usb-gadget-tegra-xudc-ack-st_rc-after-clearing-ctrl_run.patch usb-gadget-use-get_status-callback-to-set-remote-wakeup-capability.patch usb-host-tegra-prevent-host-controller-crash-when-otg-port-is-used.patch x86-microcode-consolidate-the-loader-enablement-checking.patch xen-swiotlb-use-swiotlb-bouncing-if-kmalloc-allocation-demands-it.patch xenbus-use-kref-to-track-req-lifetime.patch --- diff --git a/queue-6.14/accel-ivpu-increase-state-dump-msg-timeout.patch b/queue-6.14/accel-ivpu-increase-state-dump-msg-timeout.patch new file mode 100644 index 0000000000..91fe7e7688 --- /dev/null +++ b/queue-6.14/accel-ivpu-increase-state-dump-msg-timeout.patch @@ -0,0 +1,34 @@ +From c4eb2f88d2796ab90c5430e11c48709716181364 Mon Sep 17 00:00:00 2001 +From: Jacek Lawrynowicz +Date: Fri, 25 Apr 2025 11:28:22 +0200 +Subject: accel/ivpu: Increase state dump msg timeout + +From: Jacek Lawrynowicz + +commit c4eb2f88d2796ab90c5430e11c48709716181364 upstream. + +Increase JMS message state dump command timeout to 100 ms. On some +platforms, the FW may take a bit longer than 50 ms to dump its state +to the log buffer and we don't want to miss any debug info during TDR. + +Fixes: 5e162f872d7a ("accel/ivpu: Add FW state dump on TDR") +Cc: stable@vger.kernel.org # v6.13+ +Reviewed-by: Jeff Hugo +Signed-off-by: Jacek Lawrynowicz +Link: https://lore.kernel.org/r/20250425092822.2194465-1-jacek.lawrynowicz@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_hw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/accel/ivpu/ivpu_hw.c ++++ b/drivers/accel/ivpu/ivpu_hw.c +@@ -106,7 +106,7 @@ static void timeouts_init(struct ivpu_de + else + vdev->timeout.autosuspend = 100; + vdev->timeout.d0i3_entry_msg = 5; +- vdev->timeout.state_dump_msg = 10; ++ vdev->timeout.state_dump_msg = 100; + } + } + diff --git a/queue-6.14/arm64-cpufeature-move-arm64_use_ng_mappings-to-the-.data-section-to-prevent-wrong-idmap-generation.patch b/queue-6.14/arm64-cpufeature-move-arm64_use_ng_mappings-to-the-.data-section-to-prevent-wrong-idmap-generation.patch new file mode 100644 index 0000000000..752784224c --- /dev/null +++ b/queue-6.14/arm64-cpufeature-move-arm64_use_ng_mappings-to-the-.data-section-to-prevent-wrong-idmap-generation.patch @@ -0,0 +1,77 @@ +From 363cd2b81cfdf706bbfc9ec78db000c9b1ecc552 Mon Sep 17 00:00:00 2001 +From: Yeoreum Yun +Date: Fri, 2 May 2025 19:04:12 +0100 +Subject: arm64: cpufeature: Move arm64_use_ng_mappings to the .data section to prevent wrong idmap generation + +From: Yeoreum Yun + +commit 363cd2b81cfdf706bbfc9ec78db000c9b1ecc552 upstream. + +The PTE_MAYBE_NG macro sets the nG page table bit according to the value +of "arm64_use_ng_mappings". This variable is currently placed in the +.bss section. create_init_idmap() is called before the .bss section +initialisation which is done in early_map_kernel(). Therefore, +data/test_prot in create_init_idmap() could be set incorrectly through +the PAGE_KERNEL -> PROT_DEFAULT -> PTE_MAYBE_NG macros. + + # llvm-objdump-21 --syms vmlinux-gcc | grep arm64_use_ng_mappings + ffff800082f242a8 g O .bss 0000000000000001 arm64_use_ng_mappings + +The create_init_idmap() function disassembly compiled with llvm-21: + + // create_init_idmap() + ffff80008255c058: d10103ff sub sp, sp, #0x40 + ffff80008255c05c: a9017bfd stp x29, x30, [sp, #0x10] + ffff80008255c060: a90257f6 stp x22, x21, [sp, #0x20] + ffff80008255c064: a9034ff4 stp x20, x19, [sp, #0x30] + ffff80008255c068: 910043fd add x29, sp, #0x10 + ffff80008255c06c: 90003fc8 adrp x8, 0xffff800082d54000 + ffff80008255c070: d280e06a mov x10, #0x703 // =1795 + ffff80008255c074: 91400409 add x9, x0, #0x1, lsl #12 // =0x1000 + ffff80008255c078: 394a4108 ldrb w8, [x8, #0x290] ------------- (1) + ffff80008255c07c: f2e00d0a movk x10, #0x68, lsl #48 + ffff80008255c080: f90007e9 str x9, [sp, #0x8] + ffff80008255c084: aa0103f3 mov x19, x1 + ffff80008255c088: aa0003f4 mov x20, x0 + ffff80008255c08c: 14000000 b 0xffff80008255c08c <__pi_create_init_idmap+0x34> + ffff80008255c090: aa082d56 orr x22, x10, x8, lsl #11 -------- (2) + +Note (1) is loading the arm64_use_ng_mappings value in w8 and (2) is set +the text or data prot with the w8 value to set PTE_NG bit. If the .bss +section isn't initialized, x8 could include a garbage value and generate +an incorrect mapping. + +Annotate arm64_use_ng_mappings as __read_mostly so that it is placed in +the .data section. + +Fixes: 84b04d3e6bdb ("arm64: kernel: Create initial ID map from C code") +Cc: stable@vger.kernel.org # 6.9.x +Tested-by: Nathan Chancellor +Signed-off-by: Yeoreum Yun +Link: https://lore.kernel.org/r/20250502180412.3774883-1-yeoreum.yun@arm.com +[catalin.marinas@arm.com: use __read_mostly instead of __ro_after_init] +[catalin.marinas@arm.com: slight tweaking of the code comment] +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpufeature.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -113,7 +113,14 @@ static struct arm64_cpu_capabilities con + + DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); + +-bool arm64_use_ng_mappings = false; ++/* ++ * arm64_use_ng_mappings must be placed in the .data section, otherwise it ++ * ends up in the .bss section where it is initialized in early_map_kernel() ++ * after the MMU (with the idmap) was enabled. create_init_idmap() - which ++ * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG - ++ * may end up generating an incorrect idmap page table attributes. ++ */ ++bool arm64_use_ng_mappings __read_mostly = false; + EXPORT_SYMBOL(arm64_use_ng_mappings); + + DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; diff --git a/queue-6.14/clocksource-i8253-use-raw_spinlock_irqsave-in-clockevent_i8253_disable.patch b/queue-6.14/clocksource-i8253-use-raw_spinlock_irqsave-in-clockevent_i8253_disable.patch new file mode 100644 index 0000000000..11b6094150 --- /dev/null +++ b/queue-6.14/clocksource-i8253-use-raw_spinlock_irqsave-in-clockevent_i8253_disable.patch @@ -0,0 +1,53 @@ +From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 4 Apr 2025 15:31:16 +0200 +Subject: clocksource/i8253: Use raw_spinlock_irqsave() in clockevent_i8253_disable() + +From: Sebastian Andrzej Siewior + +commit 94cff94634e506a4a44684bee1875d2dbf782722 upstream. + +On x86 during boot, clockevent_i8253_disable() can be invoked via +x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens +with enabled interrupts. + +If some of the old i8253 hardware is actually used then lockdep will notice +that i8253_lock is used in hard interrupt context. This causes lockdep to +complain because it observed the lock being acquired with interrupts +enabled and in hard interrupt context. + +Make clockevent_i8253_disable() acquire the lock with +raw_spinlock_irqsave() to cure this. + +[ tglx: Massage change log and use guard() ] + +Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets") +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clocksource/i8253.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/clocksource/i8253.c ++++ b/drivers/clocksource/i8253.c +@@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void) + #ifdef CONFIG_CLKEVT_I8253 + void clockevent_i8253_disable(void) + { +- raw_spin_lock(&i8253_lock); ++ guard(raw_spinlock_irqsave)(&i8253_lock); + + /* + * Writing the MODE register should stop the counter, according to +@@ -132,8 +132,6 @@ void clockevent_i8253_disable(void) + outb_p(0, PIT_CH0); + + outb_p(0x30, PIT_MODE); +- +- raw_spin_unlock(&i8253_lock); + } + + static int pit_shutdown(struct clock_event_device *evt) diff --git a/queue-6.14/kvm-arm64-fix-uninitialized-memcache-pointer-in-user_mem_abort.patch b/queue-6.14/kvm-arm64-fix-uninitialized-memcache-pointer-in-user_mem_abort.patch new file mode 100644 index 0000000000..4dbbe82539 --- /dev/null +++ b/queue-6.14/kvm-arm64-fix-uninitialized-memcache-pointer-in-user_mem_abort.patch @@ -0,0 +1,62 @@ +From 157dbc4a321f5bb6f8b6c724d12ba720a90f1a7c Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Mon, 5 May 2025 19:31:48 +0200 +Subject: KVM: arm64: Fix uninitialized memcache pointer in user_mem_abort() + +From: Sebastian Ott + +commit 157dbc4a321f5bb6f8b6c724d12ba720a90f1a7c upstream. + +Commit fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") made the +initialization of the local memcache variable in user_mem_abort() +conditional, leaving a codepath where it is used uninitialized via +kvm_pgtable_stage2_map(). + +This can fail on any path that requires a stage-2 allocation +without transition via a permission fault or dirty logging. + +Fix this by making sure that memcache is always valid. + +Fixes: fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") +Signed-off-by: Sebastian Ott +Reviewed-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/kvmarm/3f5db4c7-ccce-fb95-595c-692fa7aad227@redhat.com/ +Link: https://lore.kernel.org/r/20250505173148.33900-1-sebott@redhat.com +Signed-off-by: Oliver Upton +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/mmu.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -1489,6 +1489,11 @@ static int user_mem_abort(struct kvm_vcp + return -EFAULT; + } + ++ if (!is_protected_kvm_enabled()) ++ memcache = &vcpu->arch.mmu_page_cache; ++ else ++ memcache = &vcpu->arch.pkvm_memcache; ++ + /* + * Permission faults just need to update the existing leaf entry, + * and so normally don't require allocations from the memcache. The +@@ -1498,13 +1503,11 @@ static int user_mem_abort(struct kvm_vcp + if (!fault_is_perm || (logging_active && write_fault)) { + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); + +- if (!is_protected_kvm_enabled()) { +- memcache = &vcpu->arch.mmu_page_cache; ++ if (!is_protected_kvm_enabled()) + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); +- } else { +- memcache = &vcpu->arch.pkvm_memcache; ++ else + ret = topup_hyp_memcache(memcache, min_pages); +- } ++ + if (ret) + return ret; + } diff --git a/queue-6.14/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch b/queue-6.14/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch new file mode 100644 index 0000000000..45cb191998 --- /dev/null +++ b/queue-6.14/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch @@ -0,0 +1,72 @@ +From da8bf5daa5e55a6af2b285ecda460d6454712ff4 Mon Sep 17 00:00:00 2001 +From: Tom Lendacky +Date: Thu, 8 May 2025 12:24:10 -0500 +Subject: memblock: Accept allocated memory before use in memblock_double_array() + +From: Tom Lendacky + +commit da8bf5daa5e55a6af2b285ecda460d6454712ff4 upstream. + +When increasing the array size in memblock_double_array() and the slab +is not yet available, a call to memblock_find_in_range() is used to +reserve/allocate memory. However, the range returned may not have been +accepted, which can result in a crash when booting an SNP guest: + + RIP: 0010:memcpy_orig+0x68/0x130 + Code: ... + RSP: 0000:ffffffff9cc03ce8 EFLAGS: 00010006 + RAX: ff11001ff83e5000 RBX: 0000000000000000 RCX: fffffffffffff000 + RDX: 0000000000000bc0 RSI: ffffffff9dba8860 RDI: ff11001ff83e5c00 + RBP: 0000000000002000 R08: 0000000000000000 R09: 0000000000002000 + R10: 000000207fffe000 R11: 0000040000000000 R12: ffffffff9d06ef78 + R13: ff11001ff83e5000 R14: ffffffff9dba7c60 R15: 0000000000000c00 + memblock_double_array+0xff/0x310 + memblock_add_range+0x1fb/0x2f0 + memblock_reserve+0x4f/0xa0 + memblock_alloc_range_nid+0xac/0x130 + memblock_alloc_internal+0x53/0xc0 + memblock_alloc_try_nid+0x3d/0xa0 + swiotlb_init_remap+0x149/0x2f0 + mem_init+0xb/0xb0 + mm_core_init+0x8f/0x350 + start_kernel+0x17e/0x5d0 + x86_64_start_reservations+0x14/0x30 + x86_64_start_kernel+0x92/0xa0 + secondary_startup_64_no_verify+0x194/0x19b + +Mitigate this by calling accept_memory() on the memory range returned +before the slab is available. + +Prior to v6.12, the accept_memory() interface used a 'start' and 'end' +parameter instead of 'start' and 'size', therefore the accept_memory() +call must be adjusted to specify 'start + size' for 'end' when applying +to kernels prior to v6.12. + +Cc: stable@vger.kernel.org # see patch description, needs adjustments for <= 6.11 +Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") +Signed-off-by: Tom Lendacky +Link: https://lore.kernel.org/r/da1ac73bf4ded761e21b4e4bb5178382a580cd73.1746725050.git.thomas.lendacky@amd.com +Signed-off-by: Mike Rapoport (Microsoft) +Signed-off-by: Greg Kroah-Hartman +--- + mm/memblock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -456,7 +456,14 @@ static int __init_memblock memblock_doub + min(new_area_start, memblock.current_limit), + new_alloc_size, PAGE_SIZE); + +- new_array = addr ? __va(addr) : NULL; ++ if (addr) { ++ /* The memory may not have been accepted, yet. */ ++ accept_memory(addr, new_alloc_size); ++ ++ new_array = __va(addr); ++ } else { ++ new_array = NULL; ++ } + } + if (!addr) { + pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", diff --git a/queue-6.14/module-ensure-that-kobject_put-is-safe-for-module-type-kobjects.patch b/queue-6.14/module-ensure-that-kobject_put-is-safe-for-module-type-kobjects.patch new file mode 100644 index 0000000000..513a25702e --- /dev/null +++ b/queue-6.14/module-ensure-that-kobject_put-is-safe-for-module-type-kobjects.patch @@ -0,0 +1,43 @@ +From a6aeb739974ec73e5217c75a7c008a688d3d5cf1 Mon Sep 17 00:00:00 2001 +From: Dmitry Antipov +Date: Wed, 7 May 2025 09:50:44 +0300 +Subject: module: ensure that kobject_put() is safe for module type kobjects + +From: Dmitry Antipov + +commit a6aeb739974ec73e5217c75a7c008a688d3d5cf1 upstream. + +In 'lookup_or_create_module_kobject()', an internal kobject is created +using 'module_ktype'. So call to 'kobject_put()' on error handling +path causes an attempt to use an uninitialized completion pointer in +'module_kobject_release()'. In this scenario, we just want to release +kobject without an extra synchronization required for a regular module +unloading process, so adding an extra check whether 'complete()' is +actually required makes 'kobject_put()' safe. + +Reported-by: syzbot+7fb8a372e1f6add936dd@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=7fb8a372e1f6add936dd +Fixes: 942e443127e9 ("module: Fix mod->mkobj.kobj potentially freed too early") +Cc: stable@vger.kernel.org +Suggested-by: Petr Pavlu +Signed-off-by: Dmitry Antipov +Link: https://lore.kernel.org/r/20250507065044.86529-1-dmantipov@yandex.ru +Signed-off-by: Petr Pavlu +Signed-off-by: Greg Kroah-Hartman +--- + kernel/params.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/params.c ++++ b/kernel/params.c +@@ -949,7 +949,9 @@ struct kset *module_kset; + static void module_kobj_release(struct kobject *kobj) + { + struct module_kobject *mk = to_module_kobject(kobj); +- complete(mk->kobj_completion); ++ ++ if (mk->kobj_completion) ++ complete(mk->kobj_completion); + } + + const struct kobj_type module_ktype = { diff --git a/queue-6.14/ocfs2-fix-panic-in-failed-foilio-allocation.patch b/queue-6.14/ocfs2-fix-panic-in-failed-foilio-allocation.patch new file mode 100644 index 0000000000..78b3250688 --- /dev/null +++ b/queue-6.14/ocfs2-fix-panic-in-failed-foilio-allocation.patch @@ -0,0 +1,45 @@ +From 31d4cd4eb2f8d9b87ebfa6a5e443a59e3b3d7b8c Mon Sep 17 00:00:00 2001 +From: Mark Tinguely +Date: Fri, 11 Apr 2025 11:31:24 -0500 +Subject: ocfs2: fix panic in failed foilio allocation + +From: Mark Tinguely + +commit 31d4cd4eb2f8d9b87ebfa6a5e443a59e3b3d7b8c upstream. + +commit 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") and commit +9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of +pages") save -ENOMEM in the folio array upon allocation failure and call +the folio array free code. + +The folio array free code expects either valid folio pointers or NULL. +Finding the -ENOMEM will result in a panic. Fix by NULLing the error +folio entry. + +Link: https://lkml.kernel.org/r/c879a52b-835c-4fa0-902b-8b2e9196dcbd@oracle.com +Fixes: 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") +Fixes: 9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of pages") +Signed-off-by: Mark Tinguely +Reviewed-by: Matthew Wilcox (Oracle) +Cc: Changwei Ge +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Mark Fasheh +Cc: Nathan Chancellor +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/alloc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ocfs2/alloc.c ++++ b/fs/ocfs2/alloc.c +@@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inod + if (IS_ERR(folios[numfolios])) { + ret = PTR_ERR(folios[numfolios]); + mlog_errno(ret); ++ folios[numfolios] = NULL; + goto out; + } + diff --git a/queue-6.14/ocfs2-fix-the-issue-with-discontiguous-allocation-in-the-global_bitmap.patch b/queue-6.14/ocfs2-fix-the-issue-with-discontiguous-allocation-in-the-global_bitmap.patch new file mode 100644 index 0000000000..aa13286d42 --- /dev/null +++ b/queue-6.14/ocfs2-fix-the-issue-with-discontiguous-allocation-in-the-global_bitmap.patch @@ -0,0 +1,166 @@ +From bd1261b16d9131d79723d982d54295e7f309797a Mon Sep 17 00:00:00 2001 +From: Heming Zhao +Date: Mon, 14 Apr 2025 14:01:23 +0800 +Subject: ocfs2: fix the issue with discontiguous allocation in the global_bitmap + +From: Heming Zhao + +commit bd1261b16d9131d79723d982d54295e7f309797a upstream. + +commit 4eb7b93e0310 ("ocfs2: improve write IO performance when +fragmentation is high") introduced another regression. + +The following ocfs2-test case can trigger this issue: +> discontig_runner.sh => activate_discontig_bg.sh => resv_unwritten: +> ${RESV_UNWRITTEN_BIN} -f ${WORK_PLACE}/large_testfile -s 0 -l \ +> $((${FILE_MAJOR_SIZE_M}*1024*1024)) + +In my env, test disk size (by "fdisk -l "): +> 53687091200 bytes, 104857600 sectors. + +Above command is: +> /usr/local/ocfs2-test/bin/resv_unwritten -f \ +> /mnt/ocfs2/ocfs2-activate-discontig-bg-dir/large_testfile -s 0 -l \ +> 53187969024 + +Error log: +> [*] Reserve 50724M space for a LARGE file, reserve 200M space for future test. +> ioctl error 28: "No space left on device" +> resv allocation failed Unknown error -1 +> reserve unwritten region from 0 to 53187969024. + +Call flow: +__ocfs2_change_file_space //by ioctl OCFS2_IOC_RESVSP64 + ocfs2_allocate_unwritten_extents //start:0 len:53187969024 + while() + + ocfs2_get_clusters //cpos:0, alloc_size:1623168 (cluster number) + + ocfs2_extend_allocation + + ocfs2_lock_allocators + | + choose OCFS2_AC_USE_MAIN & ocfs2_cluster_group_search + | + + ocfs2_add_inode_data + ocfs2_add_clusters_in_btree + __ocfs2_claim_clusters + ocfs2_claim_suballoc_bits + + During the allocation of the final part of the large file + (after ~47GB), no chain had the required contiguous + bits_wanted. Consequently, the allocation failed. + +How to fix: +When OCFS2 is encountering fragmented allocation, the file system should +stop attempting bits_wanted contiguous allocation and instead provide the +largest available contiguous free bits from the cluster groups. + +Link: https://lkml.kernel.org/r/20250414060125.19938-2-heming.zhao@suse.com +Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high") +Signed-off-by: Heming Zhao +Reported-by: Gautham Ananthakrishna +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/suballoc.c | 38 ++++++++++++++++++++++++++++++++------ + fs/ocfs2/suballoc.h | 1 + + 2 files changed, 33 insertions(+), 6 deletions(-) + +--- a/fs/ocfs2/suballoc.c ++++ b/fs/ocfs2/suballoc.c +@@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struc + + bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, + ac, cl); +- if (PTR_ERR(bg_bh) == -ENOSPC) ++ if (PTR_ERR(bg_bh) == -ENOSPC) { ++ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + bg_bh = ocfs2_block_group_alloc_discontig(handle, + alloc_inode, + ac, cl); ++ } + if (IS_ERR(bg_bh)) { + status = PTR_ERR(bg_bh); + bg_bh = NULL; +@@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocf + { + int status; + u16 chain; ++ u32 contig_bits; + u64 next_group; + struct inode *alloc_inode = ac->ac_inode; + struct buffer_head *group_bh = NULL; +@@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocf + status = -ENOSPC; + /* for now, the chain search is a bit simplistic. We just use + * the 1st group with any empty bits. */ +- while ((status = ac->ac_group_search(alloc_inode, group_bh, +- bits_wanted, min_bits, +- ac->ac_max_block, +- res)) == -ENOSPC) { ++ while (1) { ++ if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { ++ contig_bits = le16_to_cpu(bg->bg_contig_free_bits); ++ if (!contig_bits) ++ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, ++ le16_to_cpu(bg->bg_bits), 0); ++ if (bits_wanted > contig_bits && contig_bits >= min_bits) ++ bits_wanted = contig_bits; ++ } ++ ++ status = ac->ac_group_search(alloc_inode, group_bh, ++ bits_wanted, min_bits, ++ ac->ac_max_block, res); ++ if (status != -ENOSPC) ++ break; + if (!bg->bg_next_group) + break; + +@@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(str + victim = ocfs2_find_victim_chain(cl); + ac->ac_chain = victim; + ++search: + status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, + res, &bits_left); + if (!status) { +@@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(str + } + } + ++ /* Chains can't supply the bits_wanted contiguous space. ++ * We should switch to using every single bit when allocating ++ * from the global bitmap. */ ++ if (i == le16_to_cpu(cl->cl_next_free_rec) && ++ status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { ++ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; ++ ac->ac_chain = victim; ++ goto search; ++ } ++ + set_hint: + if (status != -ENOSPC) { + /* If the next search of this group is not likely to +@@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *han + BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); + + BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL +- && ac->ac_which != OCFS2_AC_USE_MAIN); ++ && ac->ac_which != OCFS2_AC_USE_MAIN ++ && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); + + if (ac->ac_which == OCFS2_AC_USE_LOCAL) { + WARN_ON(min_clusters > 1); +--- a/fs/ocfs2/suballoc.h ++++ b/fs/ocfs2/suballoc.h +@@ -29,6 +29,7 @@ struct ocfs2_alloc_context { + #define OCFS2_AC_USE_MAIN 2 + #define OCFS2_AC_USE_INODE 3 + #define OCFS2_AC_USE_META 4 ++#define OCFS2_AC_USE_MAIN_DISCONTIG 5 + u32 ac_which; + + /* these are used by the chain search */ diff --git a/queue-6.14/ocfs2-implement-handshaking-with-ocfs2-recovery-thread.patch b/queue-6.14/ocfs2-implement-handshaking-with-ocfs2-recovery-thread.patch new file mode 100644 index 0000000000..68b0f718c3 --- /dev/null +++ b/queue-6.14/ocfs2-implement-handshaking-with-ocfs2-recovery-thread.patch @@ -0,0 +1,139 @@ +From 8f947e0fd595951460f5a6e1ac29baa82fa02eab Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 24 Apr 2025 15:45:12 +0200 +Subject: ocfs2: implement handshaking with ocfs2 recovery thread + +From: Jan Kara + +commit 8f947e0fd595951460f5a6e1ac29baa82fa02eab upstream. + +We will need ocfs2 recovery thread to acknowledge transitions of +recovery_state when disabling particular types of recovery. This is +similar to what currently happens when disabling recovery completely, just +more general. Implement the handshake and use it for exit from recovery. + +Link: https://lkml.kernel.org/r/20250424134515.18933-5-jack@suse.cz +Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") +Signed-off-by: Jan Kara +Reviewed-by: Heming Zhao +Tested-by: Heming Zhao +Acked-by: Joseph Qi +Cc: Changwei Ge +Cc: Joel Becker +Cc: Jun Piao +Cc: Junxiao Bi +Cc: Mark Fasheh +Cc: Murad Masimov +Cc: Shichangkuo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++++++----------------- + fs/ocfs2/ocfs2.h | 4 ++++ + 2 files changed, 39 insertions(+), 17 deletions(-) + +--- a/fs/ocfs2/journal.c ++++ b/fs/ocfs2/journal.c +@@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_sup + return 0; + } + +-/* we can't grab the goofy sem lock from inside wait_event, so we use +- * memory barriers to make sure that we'll see the null task before +- * being woken up */ + static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) + { +- mb(); + return osb->recovery_thread_task != NULL; + } + +-void ocfs2_recovery_exit(struct ocfs2_super *osb) ++static void ocfs2_recovery_disable(struct ocfs2_super *osb, ++ enum ocfs2_recovery_state state) + { +- struct ocfs2_recovery_map *rm; +- +- /* disable any new recovery threads and wait for any currently +- * running ones to exit. Do this before setting the vol_state. */ + mutex_lock(&osb->recovery_lock); +- osb->recovery_state = OCFS2_REC_DISABLED; ++ /* ++ * If recovery thread is not running, we can directly transition to ++ * final state. ++ */ ++ if (!ocfs2_recovery_thread_running(osb)) { ++ osb->recovery_state = state + 1; ++ goto out_lock; ++ } ++ osb->recovery_state = state; ++ /* Wait for recovery thread to acknowledge state transition */ ++ wait_event_cmd(osb->recovery_event, ++ !ocfs2_recovery_thread_running(osb) || ++ osb->recovery_state >= state + 1, ++ mutex_unlock(&osb->recovery_lock), ++ mutex_lock(&osb->recovery_lock)); ++out_lock: + mutex_unlock(&osb->recovery_lock); +- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); + +- /* At this point, we know that no more recovery threads can be +- * launched, so wait for any recovery completion work to +- * complete. */ ++ /* ++ * At this point we know that no more recovery work can be queued so ++ * wait for any recovery completion work to complete. ++ */ + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); ++} ++ ++void ocfs2_recovery_exit(struct ocfs2_super *osb) ++{ ++ struct ocfs2_recovery_map *rm; ++ ++ /* disable any new recovery threads and wait for any currently ++ * running ones to exit. Do this before setting the vol_state. */ ++ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); + + /* + * Now that recovery is shut down, and the osb is about to be +@@ -1569,7 +1586,8 @@ bail: + + ocfs2_free_replay_slots(osb); + osb->recovery_thread_task = NULL; +- mb(); /* sync with ocfs2_recovery_thread_running */ ++ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) ++ osb->recovery_state = OCFS2_REC_DISABLED; + wake_up(&osb->recovery_event); + + mutex_unlock(&osb->recovery_lock); +@@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_ + int was_set = -1; + + mutex_lock(&osb->recovery_lock); +- if (osb->recovery_state < OCFS2_REC_DISABLED) ++ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) + was_set = ocfs2_recovery_map_set(osb, node_num); + + trace_ocfs2_recovery_thread(node_num, osb->node_num, + osb->recovery_state, osb->recovery_thread_task, was_set); + +- if (osb->recovery_state == OCFS2_REC_DISABLED) ++ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) + goto out; + + if (osb->recovery_thread_task) +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(s + + enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, ++ OCFS2_REC_WANT_DISABLE, ++ /* ++ * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work ++ */ + OCFS2_REC_DISABLED, + }; + diff --git a/queue-6.14/ocfs2-stop-quota-recovery-before-disabling-quotas.patch b/queue-6.14/ocfs2-stop-quota-recovery-before-disabling-quotas.patch new file mode 100644 index 0000000000..b97a97cc8d --- /dev/null +++ b/queue-6.14/ocfs2-stop-quota-recovery-before-disabling-quotas.patch @@ -0,0 +1,171 @@ +From fcaf3b2683b05a9684acdebda706a12025a6927a Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 24 Apr 2025 15:45:13 +0200 +Subject: ocfs2: stop quota recovery before disabling quotas + +From: Jan Kara + +commit fcaf3b2683b05a9684acdebda706a12025a6927a upstream. + +Currently quota recovery is synchronized with unmount using sb->s_umount +semaphore. That is however prone to deadlocks because +flush_workqueue(osb->ocfs2_wq) called from umount code can wait for quota +recovery to complete while ocfs2_finish_quota_recovery() waits for +sb->s_umount semaphore. + +Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is +only needed to protect that function from disabling of quotas from +ocfs2_dismount_volume(). Handle this problem by disabling quota recovery +early during unmount in ocfs2_dismount_volume() instead so that we can +drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery(). + +Link: https://lkml.kernel.org/r/20250424134515.18933-6-jack@suse.cz +Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") +Signed-off-by: Jan Kara +Reported-by: Shichangkuo +Reported-by: Murad Masimov +Reviewed-by: Heming Zhao +Tested-by: Heming Zhao +Acked-by: Joseph Qi +Cc: Changwei Ge +Cc: Joel Becker +Cc: Jun Piao +Cc: Junxiao Bi +Cc: Mark Fasheh +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/journal.c | 20 ++++++++++++++++++-- + fs/ocfs2/journal.h | 1 + + fs/ocfs2/ocfs2.h | 6 ++++++ + fs/ocfs2/quota_local.c | 9 ++------- + fs/ocfs2/super.c | 3 +++ + 5 files changed, 30 insertions(+), 9 deletions(-) + +--- a/fs/ocfs2/journal.c ++++ b/fs/ocfs2/journal.c +@@ -225,6 +225,11 @@ out_lock: + flush_workqueue(osb->ocfs2_wq); + } + ++void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) ++{ ++ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); ++} ++ + void ocfs2_recovery_exit(struct ocfs2_super *osb) + { + struct ocfs2_recovery_map *rm; +@@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void + } + } + restart: ++ if (quota_enabled) { ++ mutex_lock(&osb->recovery_lock); ++ /* Confirm that recovery thread will no longer recover quotas */ ++ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { ++ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; ++ wake_up(&osb->recovery_event); ++ } ++ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) ++ quota_enabled = 0; ++ mutex_unlock(&osb->recovery_lock); ++ } ++ + status = ocfs2_super_lock(osb, 1); + if (status < 0) { + mlog_errno(status); +@@ -1592,8 +1609,7 @@ bail: + + mutex_unlock(&osb->recovery_lock); + +- if (quota_enabled) +- kfree(rm_quota); ++ kfree(rm_quota); + + return status; + } +--- a/fs/ocfs2/journal.h ++++ b/fs/ocfs2/journal.h +@@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs + + int ocfs2_recovery_init(struct ocfs2_super *osb); + void ocfs2_recovery_exit(struct ocfs2_super *osb); ++void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); + + int ocfs2_compute_replay_slots(struct ocfs2_super *osb); + void ocfs2_free_replay_slots(struct ocfs2_super *osb); +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(s + + enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, ++ OCFS2_REC_QUOTA_WANT_DISABLE, ++ /* ++ * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for ++ * ocfs2_recovery_disable_quota() to work. ++ */ ++ OCFS2_REC_QUOTA_DISABLED, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work +--- a/fs/ocfs2/quota_local.c ++++ b/fs/ocfs2/quota_local.c +@@ -453,8 +453,7 @@ out: + + /* Sync changes in local quota file into global quota file and + * reinitialize local quota file. +- * The function expects local quota file to be already locked and +- * s_umount locked in shared mode. */ ++ * The function expects local quota file to be already locked. */ + static int ocfs2_recover_local_quota_file(struct inode *lqinode, + int type, + struct ocfs2_quota_recovery *rec) +@@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct o + { + unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; +- struct super_block *sb = osb->sb; + struct ocfs2_local_disk_dqinfo *ldinfo; + struct buffer_head *bh; + handle_t *handle; +@@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct o + printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " + "slot %u\n", osb->dev_str, slot_num); + +- down_read(&sb->s_umount); + for (type = 0; type < OCFS2_MAXQUOTAS; type++) { + if (list_empty(&(rec->r_list[type]))) + continue; +@@ -677,7 +674,6 @@ out_put: + break; + } + out: +- up_read(&sb->s_umount); + kfree(rec); + return status; + } +@@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct + ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + + /* +- * s_umount held in exclusive mode protects us against racing with +- * recovery thread... ++ * ocfs2_dismount_volume() has already aborted quota recovery... + */ + if (oinfo->dqi_rec) { + ocfs2_free_quota_recovery(oinfo->dqi_rec); +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct + /* Orphan scan should be stopped as early as possible */ + ocfs2_orphan_scan_stop(osb); + ++ /* Stop quota recovery so that we can disable quotas */ ++ ocfs2_recovery_disable_quota(osb); ++ + ocfs2_disable_quotas(osb); + + /* All dquots should be freed by now */ diff --git a/queue-6.14/ocfs2-switch-osb-disable_recovery-to-enum.patch b/queue-6.14/ocfs2-switch-osb-disable_recovery-to-enum.patch new file mode 100644 index 0000000000..af2cf292af --- /dev/null +++ b/queue-6.14/ocfs2-switch-osb-disable_recovery-to-enum.patch @@ -0,0 +1,107 @@ +From c0fb83088f0cc4ee4706e0495ee8b06f49daa716 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 24 Apr 2025 15:45:11 +0200 +Subject: ocfs2: switch osb->disable_recovery to enum + +From: Jan Kara + +commit c0fb83088f0cc4ee4706e0495ee8b06f49daa716 upstream. + +Patch series "ocfs2: Fix deadlocks in quota recovery", v3. + +This implements another approach to fixing quota recovery deadlocks. We +avoid grabbing sb->s_umount semaphore from ocfs2_finish_quota_recovery() +and instead stop quota recovery early in ocfs2_dismount_volume(). + + +This patch (of 3): + +We will need more recovery states than just pure enable / disable to fix +deadlocks with quota recovery. Switch osb->disable_recovery to enum. + +Link: https://lkml.kernel.org/r/20250424134301.1392-1-jack@suse.cz +Link: https://lkml.kernel.org/r/20250424134515.18933-4-jack@suse.cz +Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") +Signed-off-by: Jan Kara +Reviewed-by: Heming Zhao +Tested-by: Heming Zhao +Acked-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: Murad Masimov +Cc: Shichangkuo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/journal.c | 14 ++++++++------ + fs/ocfs2/ocfs2.h | 7 ++++++- + 2 files changed, 14 insertions(+), 7 deletions(-) + +--- a/fs/ocfs2/journal.c ++++ b/fs/ocfs2/journal.c +@@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_sup + struct ocfs2_recovery_map *rm; + + mutex_init(&osb->recovery_lock); +- osb->disable_recovery = 0; ++ osb->recovery_state = OCFS2_REC_ENABLED; + osb->recovery_thread_task = NULL; + init_waitqueue_head(&osb->recovery_event); + +@@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_su + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + mutex_lock(&osb->recovery_lock); +- osb->disable_recovery = 1; ++ osb->recovery_state = OCFS2_REC_DISABLED; + mutex_unlock(&osb->recovery_lock); + wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); + +@@ -1582,14 +1582,16 @@ bail: + + void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) + { ++ int was_set = -1; ++ + mutex_lock(&osb->recovery_lock); ++ if (osb->recovery_state < OCFS2_REC_DISABLED) ++ was_set = ocfs2_recovery_map_set(osb, node_num); + + trace_ocfs2_recovery_thread(node_num, osb->node_num, +- osb->disable_recovery, osb->recovery_thread_task, +- osb->disable_recovery ? +- -1 : ocfs2_recovery_map_set(osb, node_num)); ++ osb->recovery_state, osb->recovery_thread_task, was_set); + +- if (osb->disable_recovery) ++ if (osb->recovery_state == OCFS2_REC_DISABLED) + goto out; + + if (osb->recovery_thread_task) +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type { + void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]); + ++enum ocfs2_recovery_state { ++ OCFS2_REC_ENABLED = 0, ++ OCFS2_REC_DISABLED, ++}; ++ + struct ocfs2_journal; + struct ocfs2_slot_info; + struct ocfs2_recovery_map; +@@ -370,7 +375,7 @@ struct ocfs2_super + struct ocfs2_recovery_map *recovery_map; + struct ocfs2_replay_map *replay_map; + struct task_struct *recovery_thread_task; +- int disable_recovery; ++ enum ocfs2_recovery_state recovery_state; + wait_queue_head_t checkpoint_event; + struct ocfs2_journal *journal; + unsigned long osb_commit_interval; diff --git a/queue-6.14/series b/queue-6.14/series index 8fc12301eb..6b284e6a16 100644 --- a/queue-6.14/series +++ b/queue-6.14/series @@ -116,3 +116,25 @@ drm-amdgpu-hdp6-use-memcfg-register-to-post-the-write-for-hdp-flush.patch drm-amdgpu-hdp7-use-memcfg-register-to-post-the-write-for-hdp-flush.patch xhci-dbc-avoid-event-polling-busyloop-if-pending-rx-transfers-are-inactive.patch usb-uhci-platform-make-the-clock-really-optional.patch +smb-client-avoid-race-in-open_cached_dir-with-lease-breaks.patch +xen-swiotlb-use-swiotlb-bouncing-if-kmalloc-allocation-demands-it.patch +xenbus-use-kref-to-track-req-lifetime.patch +accel-ivpu-increase-state-dump-msg-timeout.patch +arm64-cpufeature-move-arm64_use_ng_mappings-to-the-.data-section-to-prevent-wrong-idmap-generation.patch +clocksource-i8253-use-raw_spinlock_irqsave-in-clockevent_i8253_disable.patch +kvm-arm64-fix-uninitialized-memcache-pointer-in-user_mem_abort.patch +memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch +module-ensure-that-kobject_put-is-safe-for-module-type-kobjects.patch +x86-microcode-consolidate-the-loader-enablement-checking.patch +ocfs2-fix-panic-in-failed-foilio-allocation.patch +ocfs2-fix-the-issue-with-discontiguous-allocation-in-the-global_bitmap.patch +ocfs2-switch-osb-disable_recovery-to-enum.patch +ocfs2-implement-handshaking-with-ocfs2-recovery-thread.patch +ocfs2-stop-quota-recovery-before-disabling-quotas.patch +usb-dwc3-gadget-make-gadget_wakeup-asynchronous.patch +usb-cdnsp-fix-issue-with-resuming-from-l1.patch +usb-cdnsp-fix-l1-resume-issue-for-rtl_revision_new_lpm-version.patch +usb-gadget-f_ecm-add-get_status-callback.patch +usb-gadget-tegra-xudc-ack-st_rc-after-clearing-ctrl_run.patch +usb-gadget-use-get_status-callback-to-set-remote-wakeup-capability.patch +usb-host-tegra-prevent-host-controller-crash-when-otg-port-is-used.patch diff --git a/queue-6.14/smb-client-avoid-race-in-open_cached_dir-with-lease-breaks.patch b/queue-6.14/smb-client-avoid-race-in-open_cached_dir-with-lease-breaks.patch new file mode 100644 index 0000000000..f811f61eff --- /dev/null +++ b/queue-6.14/smb-client-avoid-race-in-open_cached_dir-with-lease-breaks.patch @@ -0,0 +1,89 @@ +From 3ca02e63edccb78ef3659bebc68579c7224a6ca2 Mon Sep 17 00:00:00 2001 +From: Paul Aurich +Date: Tue, 6 May 2025 22:28:09 -0700 +Subject: smb: client: Avoid race in open_cached_dir with lease breaks + +From: Paul Aurich + +commit 3ca02e63edccb78ef3659bebc68579c7224a6ca2 upstream. + +A pre-existing valid cfid returned from find_or_create_cached_dir might +race with a lease break, meaning open_cached_dir doesn't consider it +valid, and thinks it's newly-constructed. This leaks a dentry reference +if the allocation occurs before the queued lease break work runs. + +Avoid the race by extending holding the cfid_list_lock across +find_or_create_cached_dir and when the result is checked. + +Cc: stable@vger.kernel.org +Reviewed-by: Henrique Carvalho +Signed-off-by: Paul Aurich +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/client/cached_dir.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +--- a/fs/smb/client/cached_dir.c ++++ b/fs/smb/client/cached_dir.c +@@ -29,7 +29,6 @@ static struct cached_fid *find_or_create + { + struct cached_fid *cfid; + +- spin_lock(&cfids->cfid_list_lock); + list_for_each_entry(cfid, &cfids->entries, entry) { + if (!strcmp(cfid->path, path)) { + /* +@@ -38,25 +37,20 @@ static struct cached_fid *find_or_create + * being deleted due to a lease break. + */ + if (!cfid->time || !cfid->has_lease) { +- spin_unlock(&cfids->cfid_list_lock); + return NULL; + } + kref_get(&cfid->refcount); +- spin_unlock(&cfids->cfid_list_lock); + return cfid; + } + } + if (lookup_only) { +- spin_unlock(&cfids->cfid_list_lock); + return NULL; + } + if (cfids->num_entries >= max_cached_dirs) { +- spin_unlock(&cfids->cfid_list_lock); + return NULL; + } + cfid = init_cached_dir(path); + if (cfid == NULL) { +- spin_unlock(&cfids->cfid_list_lock); + return NULL; + } + cfid->cfids = cfids; +@@ -74,7 +68,6 @@ static struct cached_fid *find_or_create + */ + cfid->has_lease = true; + +- spin_unlock(&cfids->cfid_list_lock); + return cfid; + } + +@@ -187,8 +180,10 @@ replay_again: + if (!utf16_path) + return -ENOMEM; + ++ spin_lock(&cfids->cfid_list_lock); + cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); + if (cfid == NULL) { ++ spin_unlock(&cfids->cfid_list_lock); + kfree(utf16_path); + return -ENOENT; + } +@@ -197,7 +192,6 @@ replay_again: + * Otherwise, it is either a new entry or laundromat worker removed it + * from @cfids->entries. Caller will put last reference if the latter. + */ +- spin_lock(&cfids->cfid_list_lock); + if (cfid->has_lease && cfid->time) { + spin_unlock(&cfids->cfid_list_lock); + *ret_cfid = cfid; diff --git a/queue-6.14/usb-cdnsp-fix-issue-with-resuming-from-l1.patch b/queue-6.14/usb-cdnsp-fix-issue-with-resuming-from-l1.patch new file mode 100644 index 0000000000..45ee024538 --- /dev/null +++ b/queue-6.14/usb-cdnsp-fix-issue-with-resuming-from-l1.patch @@ -0,0 +1,149 @@ +From 241e2ce88e5a494be7a5d44c0697592f1632fbee Mon Sep 17 00:00:00 2001 +From: Pawel Laszczak +Date: Fri, 18 Apr 2025 04:55:16 +0000 +Subject: usb: cdnsp: Fix issue with resuming from L1 + +From: Pawel Laszczak + +commit 241e2ce88e5a494be7a5d44c0697592f1632fbee upstream. + +In very rare cases after resuming controller from L1 to L0 it reads +registers before the clock UTMI have been enabled and as the result +driver reads incorrect value. +Most of registers are in APB domain clock but some of them (e.g. PORTSC) +are in UTMI domain clock. +After entering to L1 state the UTMI clock can be disabled. +When controller transition from L1 to L0 the port status change event is +reported and in interrupt runtime function driver reads PORTSC. +During this read operation controller synchronize UTMI and APB domain +but UTMI clock is still disabled and in result it reads 0xFFFFFFFF value. +To fix this issue driver increases APB timeout value. + +The issue is platform specific and if the default value of APB timeout +is not sufficient then this time should be set Individually for each +platform. + +Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") +Cc: stable +Signed-off-by: Pawel Laszczak +Acked-by: Peter Chen +Link: https://lore.kernel.org/r/PH7PR07MB953846C57973E4DB134CAA71DDBF2@PH7PR07MB9538.namprd07.prod.outlook.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/cdns3/cdnsp-gadget.c | 29 +++++++++++++++++++++++++++++ + drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ + drivers/usb/cdns3/cdnsp-pci.c | 12 ++++++++++-- + drivers/usb/cdns3/core.h | 3 +++ + 4 files changed, 45 insertions(+), 2 deletions(-) + +--- a/drivers/usb/cdns3/cdnsp-gadget.c ++++ b/drivers/usb/cdns3/cdnsp-gadget.c +@@ -139,6 +139,26 @@ static void cdnsp_clear_port_change_bit( + (portsc & PORT_CHANGE_BITS), port_regs); + } + ++static void cdnsp_set_apb_timeout_value(struct cdnsp_device *pdev) ++{ ++ struct cdns *cdns = dev_get_drvdata(pdev->dev); ++ __le32 __iomem *reg; ++ void __iomem *base; ++ u32 offset = 0; ++ u32 val; ++ ++ if (!cdns->override_apb_timeout) ++ return; ++ ++ base = &pdev->cap_regs->hc_capbase; ++ offset = cdnsp_find_next_ext_cap(base, offset, D_XEC_PRE_REGS_CAP); ++ reg = base + offset + REG_CHICKEN_BITS_3_OFFSET; ++ ++ val = le32_to_cpu(readl(reg)); ++ val = CHICKEN_APB_TIMEOUT_SET(val, cdns->override_apb_timeout); ++ writel(cpu_to_le32(val), reg); ++} ++ + static void cdnsp_set_chicken_bits_2(struct cdnsp_device *pdev, u32 bit) + { + __le32 __iomem *reg; +@@ -1798,6 +1818,15 @@ static int cdnsp_gen_setup(struct cdnsp_ + pdev->hci_version = HC_VERSION(pdev->hcc_params); + pdev->hcc_params = readl(&pdev->cap_regs->hcc_params); + ++ /* ++ * Override the APB timeout value to give the controller more time for ++ * enabling UTMI clock and synchronizing APB and UTMI clock domains. ++ * This fix is platform specific and is required to fixes issue with ++ * reading incorrect value from PORTSC register after resuming ++ * from L1 state. ++ */ ++ cdnsp_set_apb_timeout_value(pdev); ++ + cdnsp_get_rev_cap(pdev); + + /* Make sure the Device Controller is halted. */ +--- a/drivers/usb/cdns3/cdnsp-gadget.h ++++ b/drivers/usb/cdns3/cdnsp-gadget.h +@@ -520,6 +520,9 @@ struct cdnsp_rev_cap { + #define REG_CHICKEN_BITS_2_OFFSET 0x48 + #define CHICKEN_XDMA_2_TP_CACHE_DIS BIT(28) + ++#define REG_CHICKEN_BITS_3_OFFSET 0x4C ++#define CHICKEN_APB_TIMEOUT_SET(p, val) (((p) & ~GENMASK(21, 0)) | (val)) ++ + /* XBUF Extended Capability ID. */ + #define XBUF_CAP_ID 0xCB + #define XBUF_RX_TAG_MASK_0_OFFSET 0x1C +--- a/drivers/usb/cdns3/cdnsp-pci.c ++++ b/drivers/usb/cdns3/cdnsp-pci.c +@@ -28,6 +28,8 @@ + #define PCI_DRIVER_NAME "cdns-pci-usbssp" + #define PLAT_DRIVER_NAME "cdns-usbssp" + ++#define CHICKEN_APB_TIMEOUT_VALUE 0x1C20 ++ + static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) + { + /* +@@ -139,6 +141,14 @@ static int cdnsp_pci_probe(struct pci_de + cdnsp->otg_irq = pdev->irq; + } + ++ /* ++ * Cadence PCI based platform require some longer timeout for APB ++ * to fixes domain clock synchronization issue after resuming ++ * controller from L1 state. ++ */ ++ cdnsp->override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE; ++ pci_set_drvdata(pdev, cdnsp); ++ + if (pci_is_enabled(func)) { + cdnsp->dev = dev; + cdnsp->gadget_init = cdnsp_gadget_init; +@@ -148,8 +158,6 @@ static int cdnsp_pci_probe(struct pci_de + goto free_cdnsp; + } + +- pci_set_drvdata(pdev, cdnsp); +- + device_wakeup_enable(&pdev->dev); + if (pci_dev_run_wake(pdev)) + pm_runtime_put_noidle(&pdev->dev); +--- a/drivers/usb/cdns3/core.h ++++ b/drivers/usb/cdns3/core.h +@@ -79,6 +79,8 @@ struct cdns3_platform_data { + * @pdata: platform data from glue layer + * @lock: spinlock structure + * @xhci_plat_data: xhci private data structure pointer ++ * @override_apb_timeout: hold value of APB timeout. For value 0 the default ++ * value in CHICKEN_BITS_3 will be preserved. + * @gadget_init: pointer to gadget initialization function + */ + struct cdns { +@@ -117,6 +119,7 @@ struct cdns { + struct cdns3_platform_data *pdata; + spinlock_t lock; + struct xhci_plat_priv *xhci_plat_data; ++ u32 override_apb_timeout; + + int (*gadget_init)(struct cdns *cdns); + }; diff --git a/queue-6.14/usb-cdnsp-fix-l1-resume-issue-for-rtl_revision_new_lpm-version.patch b/queue-6.14/usb-cdnsp-fix-l1-resume-issue-for-rtl_revision_new_lpm-version.patch new file mode 100644 index 0000000000..6e855ac272 --- /dev/null +++ b/queue-6.14/usb-cdnsp-fix-l1-resume-issue-for-rtl_revision_new_lpm-version.patch @@ -0,0 +1,76 @@ +From 8614ecdb1570e4fffe87ebdc62b613ed66f1f6a6 Mon Sep 17 00:00:00 2001 +From: Pawel Laszczak +Date: Fri, 25 Apr 2025 05:55:40 +0000 +Subject: usb: cdnsp: fix L1 resume issue for RTL_REVISION_NEW_LPM version + +From: Pawel Laszczak + +commit 8614ecdb1570e4fffe87ebdc62b613ed66f1f6a6 upstream. + +The controllers with rtl version larger than +RTL_REVISION_NEW_LPM (0x00002700) has bug which causes that controller +doesn't resume from L1 state. It happens if after receiving LPM packet +controller starts transitioning to L1 and in this moment the driver force +resuming by write operation to PORTSC.PLS. +It's corner case and happens when write operation to PORTSC occurs during +device delay before transitioning to L1 after transmitting ACK +time (TL1TokenRetry). + +Forcing transition from L1->L0 by driver for revision larger than +RTL_REVISION_NEW_LPM is not needed, so driver can simply fix this issue +through block call of cdnsp_force_l0_go function. + +Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") +Cc: stable +Signed-off-by: Pawel Laszczak +Acked-by: Peter Chen +Link: https://lore.kernel.org/r/PH7PR07MB9538B55C3A6E71F9ED29E980DD842@PH7PR07MB9538.namprd07.prod.outlook.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/cdns3/cdnsp-gadget.c | 2 ++ + drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ + drivers/usb/cdns3/cdnsp-ring.c | 3 ++- + 3 files changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/usb/cdns3/cdnsp-gadget.c ++++ b/drivers/usb/cdns3/cdnsp-gadget.c +@@ -1793,6 +1793,8 @@ static void cdnsp_get_rev_cap(struct cdn + reg += cdnsp_find_next_ext_cap(reg, 0, RTL_REV_CAP); + pdev->rev_cap = reg; + ++ pdev->rtl_revision = readl(&pdev->rev_cap->rtl_revision); ++ + dev_info(pdev->dev, "Rev: %08x/%08x, eps: %08x, buff: %08x/%08x\n", + readl(&pdev->rev_cap->ctrl_revision), + readl(&pdev->rev_cap->rtl_revision), +--- a/drivers/usb/cdns3/cdnsp-gadget.h ++++ b/drivers/usb/cdns3/cdnsp-gadget.h +@@ -1360,6 +1360,7 @@ struct cdnsp_port { + * @rev_cap: Controller Capabilities Registers. + * @hcs_params1: Cached register copies of read-only HCSPARAMS1 + * @hcc_params: Cached register copies of read-only HCCPARAMS1 ++ * @rtl_revision: Cached controller rtl revision. + * @setup: Temporary buffer for setup packet. + * @ep0_preq: Internal allocated request used during enumeration. + * @ep0_stage: ep0 stage during enumeration process. +@@ -1414,6 +1415,8 @@ struct cdnsp_device { + __u32 hcs_params1; + __u32 hcs_params3; + __u32 hcc_params; ++ #define RTL_REVISION_NEW_LPM 0x2700 ++ __u32 rtl_revision; + /* Lock used in interrupt thread context. */ + spinlock_t lock; + struct usb_ctrlrequest setup; +--- a/drivers/usb/cdns3/cdnsp-ring.c ++++ b/drivers/usb/cdns3/cdnsp-ring.c +@@ -308,7 +308,8 @@ static bool cdnsp_ring_ep_doorbell(struc + + writel(db_value, reg_addr); + +- cdnsp_force_l0_go(pdev); ++ if (pdev->rtl_revision < RTL_REVISION_NEW_LPM) ++ cdnsp_force_l0_go(pdev); + + /* Doorbell was set. */ + return true; diff --git a/queue-6.14/usb-dwc3-gadget-make-gadget_wakeup-asynchronous.patch b/queue-6.14/usb-dwc3-gadget-make-gadget_wakeup-asynchronous.patch new file mode 100644 index 0000000000..f2606095c3 --- /dev/null +++ b/queue-6.14/usb-dwc3-gadget-make-gadget_wakeup-asynchronous.patch @@ -0,0 +1,186 @@ +From 2372f1caeca433c4c01c2482f73fbe057f5168ce Mon Sep 17 00:00:00 2001 +From: Prashanth K +Date: Tue, 22 Apr 2025 16:02:31 +0530 +Subject: usb: dwc3: gadget: Make gadget_wakeup asynchronous + +From: Prashanth K + +commit 2372f1caeca433c4c01c2482f73fbe057f5168ce upstream. + +Currently gadget_wakeup() waits for U0 synchronously if it was +called from func_wakeup(), this is because we need to send the +function wakeup command soon after the link is active. And the +call is made synchronous by polling DSTS continuosly for 20000 +times in __dwc3_gadget_wakeup(). But it observed that sometimes +the link is not active even after polling 20K times, leading to +remote wakeup failures. Adding a small delay between each poll +helps, but that won't guarantee resolution in future. Hence make +the gadget_wakeup completely asynchronous. + +Since multiple interfaces can issue a function wakeup at once, +add a new variable wakeup_pending_funcs which will indicate the +functions that has issued func_wakup, this is represented in a +bitmap format. If the link is in U3, dwc3_gadget_func_wakeup() +will set the bit corresponding to interface_id and bail out. +Once link comes back to U0, linksts_change irq is triggered, +where the function wakeup command is sent based on bitmap. + +Cc: stable +Fixes: 92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support") +Signed-off-by: Prashanth K +Acked-by: Thinh Nguyen +Link: https://lore.kernel.org/r/20250422103231.1954387-4-prashanth.k@oss.qualcomm.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/dwc3/core.h | 4 +++ + drivers/usb/dwc3/gadget.c | 60 +++++++++++++++++----------------------------- + 2 files changed, 27 insertions(+), 37 deletions(-) + +--- a/drivers/usb/dwc3/core.h ++++ b/drivers/usb/dwc3/core.h +@@ -1164,6 +1164,9 @@ struct dwc3_scratchpad_array { + * @gsbuscfg0_reqinfo: store GSBUSCFG0.DATRDREQINFO, DESRDREQINFO, + * DATWRREQINFO, and DESWRREQINFO value passed from + * glue driver. ++ * @wakeup_pending_funcs: Indicates whether any interface has requested for ++ * function wakeup in bitmap format where bit position ++ * represents interface_id. + */ + struct dwc3 { + struct work_struct drd_work; +@@ -1394,6 +1397,7 @@ struct dwc3 { + int num_ep_resized; + struct dentry *debug_root; + u32 gsbuscfg0_reqinfo; ++ u32 wakeup_pending_funcs; + }; + + #define INCRX_BURST_MODE 0 +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -276,8 +276,6 @@ int dwc3_send_gadget_generic_command(str + return ret; + } + +-static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async); +- + /** + * dwc3_send_gadget_ep_cmd - issue an endpoint command + * @dep: the endpoint to which the command is going to be issued +@@ -2359,10 +2357,8 @@ static int dwc3_gadget_get_frame(struct + return __dwc3_gadget_get_frame(dwc); + } + +-static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) ++static int __dwc3_gadget_wakeup(struct dwc3 *dwc) + { +- int retries; +- + int ret; + u32 reg; + +@@ -2390,8 +2386,7 @@ static int __dwc3_gadget_wakeup(struct d + return -EINVAL; + } + +- if (async) +- dwc3_gadget_enable_linksts_evts(dwc, true); ++ dwc3_gadget_enable_linksts_evts(dwc, true); + + ret = dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RECOV); + if (ret < 0) { +@@ -2410,27 +2405,8 @@ static int __dwc3_gadget_wakeup(struct d + + /* + * Since link status change events are enabled we will receive +- * an U0 event when wakeup is successful. So bail out. ++ * an U0 event when wakeup is successful. + */ +- if (async) +- return 0; +- +- /* poll until Link State changes to ON */ +- retries = 20000; +- +- while (retries--) { +- reg = dwc3_readl(dwc->regs, DWC3_DSTS); +- +- /* in HS, means ON */ +- if (DWC3_DSTS_USBLNKST(reg) == DWC3_LINK_STATE_U0) +- break; +- } +- +- if (DWC3_DSTS_USBLNKST(reg) != DWC3_LINK_STATE_U0) { +- dev_err(dwc->dev, "failed to send remote wakeup\n"); +- return -EINVAL; +- } +- + return 0; + } + +@@ -2451,7 +2427,7 @@ static int dwc3_gadget_wakeup(struct usb + spin_unlock_irqrestore(&dwc->lock, flags); + return -EINVAL; + } +- ret = __dwc3_gadget_wakeup(dwc, true); ++ ret = __dwc3_gadget_wakeup(dwc); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -2479,14 +2455,10 @@ static int dwc3_gadget_func_wakeup(struc + */ + link_state = dwc3_gadget_get_link_state(dwc); + if (link_state == DWC3_LINK_STATE_U3) { +- ret = __dwc3_gadget_wakeup(dwc, false); +- if (ret) { +- spin_unlock_irqrestore(&dwc->lock, flags); +- return -EINVAL; +- } +- dwc3_resume_gadget(dwc); +- dwc->suspended = false; +- dwc->link_state = DWC3_LINK_STATE_U0; ++ dwc->wakeup_pending_funcs |= BIT(intf_id); ++ ret = __dwc3_gadget_wakeup(dwc); ++ spin_unlock_irqrestore(&dwc->lock, flags); ++ return ret; + } + + ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, +@@ -4314,6 +4286,8 @@ static void dwc3_gadget_linksts_change_i + { + enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK; + unsigned int pwropt; ++ int ret; ++ int intf_id; + + /* + * WORKAROUND: DWC3 < 2.50a have an issue when configured without +@@ -4389,7 +4363,7 @@ static void dwc3_gadget_linksts_change_i + + switch (next) { + case DWC3_LINK_STATE_U0: +- if (dwc->gadget->wakeup_armed) { ++ if (dwc->gadget->wakeup_armed || dwc->wakeup_pending_funcs) { + dwc3_gadget_enable_linksts_evts(dwc, false); + dwc3_resume_gadget(dwc); + dwc->suspended = false; +@@ -4412,6 +4386,18 @@ static void dwc3_gadget_linksts_change_i + } + + dwc->link_state = next; ++ ++ /* Proceed with func wakeup if any interfaces that has requested */ ++ while (dwc->wakeup_pending_funcs && (next == DWC3_LINK_STATE_U0)) { ++ intf_id = ffs(dwc->wakeup_pending_funcs) - 1; ++ ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, ++ DWC3_DGCMDPAR_DN_FUNC_WAKE | ++ DWC3_DGCMDPAR_INTF_SEL(intf_id)); ++ if (ret) ++ dev_err(dwc->dev, "Failed to send DN wake for intf %d\n", intf_id); ++ ++ dwc->wakeup_pending_funcs &= ~BIT(intf_id); ++ } + } + + static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc, diff --git a/queue-6.14/usb-gadget-f_ecm-add-get_status-callback.patch b/queue-6.14/usb-gadget-f_ecm-add-get_status-callback.patch new file mode 100644 index 0000000000..3547a20c53 --- /dev/null +++ b/queue-6.14/usb-gadget-f_ecm-add-get_status-callback.patch @@ -0,0 +1,47 @@ +From 8e3820271c517ceb89ab7442656ba49fa23ee1d0 Mon Sep 17 00:00:00 2001 +From: Prashanth K +Date: Tue, 22 Apr 2025 16:02:29 +0530 +Subject: usb: gadget: f_ecm: Add get_status callback + +From: Prashanth K + +commit 8e3820271c517ceb89ab7442656ba49fa23ee1d0 upstream. + +When host sends GET_STATUS to ECM interface, handle the request +from the function driver. Since the interface is wakeup capable, +set the corresponding bit, and set RW bit if the function is +already armed for wakeup by the host. + +Cc: stable +Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") +Signed-off-by: Prashanth K +Reviewed-by: Thinh Nguyen +Link: https://lore.kernel.org/r/20250422103231.1954387-2-prashanth.k@oss.qualcomm.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/gadget/function/f_ecm.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/usb/gadget/function/f_ecm.c ++++ b/drivers/usb/gadget/function/f_ecm.c +@@ -892,6 +892,12 @@ static void ecm_resume(struct usb_functi + gether_resume(&ecm->port); + } + ++static int ecm_get_status(struct usb_function *f) ++{ ++ return (f->func_wakeup_armed ? USB_INTRF_STAT_FUNC_RW : 0) | ++ USB_INTRF_STAT_FUNC_RW_CAP; ++} ++ + static void ecm_free(struct usb_function *f) + { + struct f_ecm *ecm; +@@ -960,6 +966,7 @@ static struct usb_function *ecm_alloc(st + ecm->port.func.disable = ecm_disable; + ecm->port.func.free_func = ecm_free; + ecm->port.func.suspend = ecm_suspend; ++ ecm->port.func.get_status = ecm_get_status; + ecm->port.func.resume = ecm_resume; + + return &ecm->port.func; diff --git a/queue-6.14/usb-gadget-tegra-xudc-ack-st_rc-after-clearing-ctrl_run.patch b/queue-6.14/usb-gadget-tegra-xudc-ack-st_rc-after-clearing-ctrl_run.patch new file mode 100644 index 0000000000..22fed06e1b --- /dev/null +++ b/queue-6.14/usb-gadget-tegra-xudc-ack-st_rc-after-clearing-ctrl_run.patch @@ -0,0 +1,40 @@ +From 59820fde001500c167342257650541280c622b73 Mon Sep 17 00:00:00 2001 +From: Wayne Chang +Date: Fri, 18 Apr 2025 16:12:28 +0800 +Subject: usb: gadget: tegra-xudc: ACK ST_RC after clearing CTRL_RUN + +From: Wayne Chang + +commit 59820fde001500c167342257650541280c622b73 upstream. + +We identified a bug where the ST_RC bit in the status register was not +being acknowledged after clearing the CTRL_RUN bit in the control +register. This could lead to unexpected behavior in the USB gadget +drivers. + +This patch resolves the issue by adding the necessary code to explicitly +acknowledge ST_RC after clearing CTRL_RUN based on the programming +sequence, ensuring proper state transition. + +Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") +Cc: stable +Signed-off-by: Wayne Chang +Link: https://lore.kernel.org/r/20250418081228.1194779-1-waynec@nvidia.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/gadget/udc/tegra-xudc.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/usb/gadget/udc/tegra-xudc.c ++++ b/drivers/usb/gadget/udc/tegra-xudc.c +@@ -1749,6 +1749,10 @@ static int __tegra_xudc_ep_disable(struc + val = xudc_readl(xudc, CTRL); + val &= ~CTRL_RUN; + xudc_writel(xudc, val, CTRL); ++ ++ val = xudc_readl(xudc, ST); ++ if (val & ST_RC) ++ xudc_writel(xudc, ST_RC, ST); + } + + dev_info(xudc->dev, "ep %u disabled\n", ep->index); diff --git a/queue-6.14/usb-gadget-use-get_status-callback-to-set-remote-wakeup-capability.patch b/queue-6.14/usb-gadget-use-get_status-callback-to-set-remote-wakeup-capability.patch new file mode 100644 index 0000000000..a2b3065944 --- /dev/null +++ b/queue-6.14/usb-gadget-use-get_status-callback-to-set-remote-wakeup-capability.patch @@ -0,0 +1,63 @@ +From 5977a58dd5a4865198b0204b998adb0f634abe19 Mon Sep 17 00:00:00 2001 +From: Prashanth K +Date: Tue, 22 Apr 2025 16:02:30 +0530 +Subject: usb: gadget: Use get_status callback to set remote wakeup capability + +From: Prashanth K + +commit 5977a58dd5a4865198b0204b998adb0f634abe19 upstream. + +Currently when the host sends GET_STATUS request for an interface, +we use get_status callbacks to set/clear remote wakeup capability +of that interface. And if get_status callback isn't present for +that interface, then we assume its remote wakeup capability based +on bmAttributes. + +Now consider a scenario, where we have a USB configuration with +multiple interfaces (say ECM + ADB), here ECM is remote wakeup +capable and as of now ADB isn't. And bmAttributes will indicate +the device as wakeup capable. With the current implementation, +when host sends GET_STATUS request for both interfaces, we will +set FUNC_RW_CAP for both. This results in USB3 CV Chapter 9.15 +(Function Remote Wakeup Test) failures as host expects remote +wakeup from both interfaces. + +The above scenario is just an example, and the failure can be +observed if we use configuration with any interface except ECM. +Hence avoid configuring remote wakeup capability from composite +driver based on bmAttributes, instead use get_status callbacks +and let the function drivers decide this. + +Cc: stable +Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") +Signed-off-by: Prashanth K +Reviewed-by: Thinh Nguyen +Link: https://lore.kernel.org/r/20250422103231.1954387-3-prashanth.k@oss.qualcomm.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/gadget/composite.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/usb/gadget/composite.c ++++ b/drivers/usb/gadget/composite.c +@@ -2011,15 +2011,13 @@ composite_setup(struct usb_gadget *gadge + + if (f->get_status) { + status = f->get_status(f); ++ + if (status < 0) + break; +- } else { +- /* Set D0 and D1 bits based on func wakeup capability */ +- if (f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP) { +- status |= USB_INTRF_STAT_FUNC_RW_CAP; +- if (f->func_wakeup_armed) +- status |= USB_INTRF_STAT_FUNC_RW; +- } ++ ++ /* if D5 is not set, then device is not wakeup capable */ ++ if (!(f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP)) ++ status &= ~(USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW); + } + + put_unaligned_le16(status & 0x0000ffff, req->buf); diff --git a/queue-6.14/usb-host-tegra-prevent-host-controller-crash-when-otg-port-is-used.patch b/queue-6.14/usb-host-tegra-prevent-host-controller-crash-when-otg-port-is-used.patch new file mode 100644 index 0000000000..465fcd7b4f --- /dev/null +++ b/queue-6.14/usb-host-tegra-prevent-host-controller-crash-when-otg-port-is-used.patch @@ -0,0 +1,71 @@ +From 732f35cf8bdfece582f6e4a9c659119036577308 Mon Sep 17 00:00:00 2001 +From: Jim Lin +Date: Tue, 22 Apr 2025 19:40:01 +0800 +Subject: usb: host: tegra: Prevent host controller crash when OTG port is used + +From: Jim Lin + +commit 732f35cf8bdfece582f6e4a9c659119036577308 upstream. + +When a USB device is connected to the OTG port, the tegra_xhci_id_work() +routine transitions the PHY to host mode and calls xhci_hub_control() +with the SetPortFeature command to enable port power. + +In certain cases, the XHCI controller may be in a low-power state +when this operation occurs. If xhci_hub_control() is invoked while +the controller is suspended, the PORTSC register may return 0xFFFFFFFF, +indicating a read failure. This causes xhci_hc_died() to be triggered, +leading to host controller shutdown. + +Example backtrace: +[ 105.445736] Workqueue: events tegra_xhci_id_work +[ 105.445747] dump_backtrace+0x0/0x1e8 +[ 105.445759] xhci_hc_died.part.48+0x40/0x270 +[ 105.445769] tegra_xhci_set_port_power+0xc0/0x240 +[ 105.445774] tegra_xhci_id_work+0x130/0x240 + +To prevent this, ensure the controller is fully resumed before +interacting with hardware registers by calling pm_runtime_get_sync() +prior to the host mode transition and xhci_hub_control(). + +Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") +Cc: stable +Signed-off-by: Jim Lin +Signed-off-by: Wayne Chang +Link: https://lore.kernel.org/r/20250422114001.126367-1-waynec@nvidia.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/host/xhci-tegra.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c +index b5c362c2051d..0c7af44d4dae 100644 +--- a/drivers/usb/host/xhci-tegra.c ++++ b/drivers/usb/host/xhci-tegra.c +@@ -1364,6 +1364,7 @@ static void tegra_xhci_id_work(struct work_struct *work) + tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, + tegra->otg_usb2_port); + ++ pm_runtime_get_sync(tegra->dev); + if (tegra->host_mode) { + /* switch to host mode */ + if (tegra->otg_usb3_port >= 0) { +@@ -1393,6 +1394,7 @@ static void tegra_xhci_id_work(struct work_struct *work) + } + + tegra_xhci_set_port_power(tegra, true, true); ++ pm_runtime_mark_last_busy(tegra->dev); + + } else { + if (tegra->otg_usb3_port >= 0) +@@ -1400,6 +1402,7 @@ static void tegra_xhci_id_work(struct work_struct *work) + + tegra_xhci_set_port_power(tegra, true, false); + } ++ pm_runtime_put_autosuspend(tegra->dev); + } + + #if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) +-- +2.49.0 + diff --git a/queue-6.14/x86-microcode-consolidate-the-loader-enablement-checking.patch b/queue-6.14/x86-microcode-consolidate-the-loader-enablement-checking.patch new file mode 100644 index 0000000000..a5e3307410 --- /dev/null +++ b/queue-6.14/x86-microcode-consolidate-the-loader-enablement-checking.patch @@ -0,0 +1,228 @@ +From 5214a9f6c0f56644acb9d2cbb58facf1856d322b Mon Sep 17 00:00:00 2001 +From: "Borislav Petkov (AMD)" +Date: Mon, 14 Apr 2025 11:59:33 +0200 +Subject: x86/microcode: Consolidate the loader enablement checking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Borislav Petkov (AMD) + +commit 5214a9f6c0f56644acb9d2cbb58facf1856d322b upstream. + +Consolidate the whole logic which determines whether the microcode loader +should be enabled or not into a single function and call it everywhere. + +Well, almost everywhere - not in mk_early_pgtbl_32() because there the kernel +is running without paging enabled and checking dis_ucode_ldr et al would +require physical addresses and uglification of the code. + +But since this is 32-bit, the easier thing to do is to simply map the initrd +unconditionally especially since that mapping is getting removed later anyway +by zap_early_initrd_mapping() and avoid the uglification. + +In doing so, address the issue of old 486er machines without CPUID +support, not booting current kernels. + + [ mingo: Fix no previous prototype for ‘microcode_loader_disabled’ [-Wmissing-prototypes] ] + +Fixes: 4c585af7180c1 ("x86/boot/32: Temporarily map initrd for microcode loading") +Signed-off-by: Borislav Petkov (AMD) +Signed-off-by: Ingo Molnar +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/CANpbe9Wm3z8fy9HbgS8cuhoj0TREYEEkBipDuhgkWFvqX0UoVQ@mail.gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/microcode.h | 2 + + arch/x86/kernel/cpu/microcode/amd.c | 6 ++- + arch/x86/kernel/cpu/microcode/core.c | 58 ++++++++++++++++++------------- + arch/x86/kernel/cpu/microcode/intel.c | 2 - + arch/x86/kernel/cpu/microcode/internal.h | 1 + arch/x86/kernel/head32.c | 4 -- + 6 files changed, 41 insertions(+), 32 deletions(-) + +--- a/arch/x86/include/asm/microcode.h ++++ b/arch/x86/include/asm/microcode.h +@@ -17,10 +17,12 @@ struct ucode_cpu_info { + void load_ucode_bsp(void); + void load_ucode_ap(void); + void microcode_bsp_resume(void); ++bool __init microcode_loader_disabled(void); + #else + static inline void load_ucode_bsp(void) { } + static inline void load_ucode_ap(void) { } + static inline void microcode_bsp_resume(void) { } ++static inline bool __init microcode_loader_disabled(void) { return false; } + #endif + + extern unsigned long initrd_start_early; +--- a/arch/x86/kernel/cpu/microcode/amd.c ++++ b/arch/x86/kernel/cpu/microcode/amd.c +@@ -1098,15 +1098,17 @@ static enum ucode_state load_microcode_a + + static int __init save_microcode_in_initrd(void) + { +- unsigned int cpuid_1_eax = native_cpuid_eax(1); + struct cpuinfo_x86 *c = &boot_cpu_data; + struct cont_desc desc = { 0 }; ++ unsigned int cpuid_1_eax; + enum ucode_state ret; + struct cpio_data cp; + +- if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) ++ if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + return 0; + ++ cpuid_1_eax = native_cpuid_eax(1); ++ + if (!find_blobs_in_containers(&cp)) + return -EINVAL; + +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -41,8 +41,8 @@ + + #include "internal.h" + +-static struct microcode_ops *microcode_ops; +-bool dis_ucode_ldr = true; ++static struct microcode_ops *microcode_ops; ++static bool dis_ucode_ldr = false; + + bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); + module_param(force_minrev, bool, S_IRUSR | S_IWUSR); +@@ -84,6 +84,9 @@ static bool amd_check_current_patch_leve + u32 lvl, dummy, i; + u32 *levels; + ++ if (x86_cpuid_vendor() != X86_VENDOR_AMD) ++ return false; ++ + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); + + levels = final_levels; +@@ -95,27 +98,29 @@ static bool amd_check_current_patch_leve + return false; + } + +-static bool __init check_loader_disabled_bsp(void) ++bool __init microcode_loader_disabled(void) + { +- static const char *__dis_opt_str = "dis_ucode_ldr"; +- const char *cmdline = boot_command_line; +- const char *option = __dis_opt_str; ++ if (dis_ucode_ldr) ++ return true; + + /* +- * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not +- * completely accurate as xen pv guests don't see that CPUID bit set but +- * that's good enough as they don't land on the BSP path anyway. ++ * Disable when: ++ * ++ * 1) The CPU does not support CPUID. ++ * ++ * 2) Bit 31 in CPUID[1]:ECX is clear ++ * The bit is reserved for hypervisor use. This is still not ++ * completely accurate as XEN PV guests don't see that CPUID bit ++ * set, but that's good enough as they don't land on the BSP ++ * path anyway. ++ * ++ * 3) Certain AMD patch levels are not allowed to be ++ * overwritten. + */ +- if (native_cpuid_ecx(1) & BIT(31)) +- return true; +- +- if (x86_cpuid_vendor() == X86_VENDOR_AMD) { +- if (amd_check_current_patch_level()) +- return true; +- } +- +- if (cmdline_find_option_bool(cmdline, option) <= 0) +- dis_ucode_ldr = false; ++ if (!have_cpuid_p() || ++ native_cpuid_ecx(1) & BIT(31) || ++ amd_check_current_patch_level()) ++ dis_ucode_ldr = true; + + return dis_ucode_ldr; + } +@@ -125,7 +130,10 @@ void __init load_ucode_bsp(void) + unsigned int cpuid_1_eax; + bool intel = true; + +- if (!have_cpuid_p()) ++ if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0) ++ dis_ucode_ldr = true; ++ ++ if (microcode_loader_disabled()) + return; + + cpuid_1_eax = native_cpuid_eax(1); +@@ -146,9 +154,6 @@ void __init load_ucode_bsp(void) + return; + } + +- if (check_loader_disabled_bsp()) +- return; +- + if (intel) + load_ucode_intel_bsp(&early_data); + else +@@ -159,6 +164,11 @@ void load_ucode_ap(void) + { + unsigned int cpuid_1_eax; + ++ /* ++ * Can't use microcode_loader_disabled() here - .init section ++ * hell. It doesn't have to either - the BSP variant must've ++ * parsed cmdline already anyway. ++ */ + if (dis_ucode_ldr) + return; + +@@ -810,7 +820,7 @@ static int __init microcode_init(void) + struct cpuinfo_x86 *c = &boot_cpu_data; + int error; + +- if (dis_ucode_ldr) ++ if (microcode_loader_disabled()) + return -EINVAL; + + if (c->x86_vendor == X86_VENDOR_INTEL) +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -389,7 +389,7 @@ static int __init save_builtin_microcode + if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) + return 0; + +- if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) ++ if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + + uci.mc = get_microcode_blob(&uci, true); +--- a/arch/x86/kernel/cpu/microcode/internal.h ++++ b/arch/x86/kernel/cpu/microcode/internal.h +@@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_fam + return x86_family(eax); + } + +-extern bool dis_ucode_ldr; + extern bool force_minrev; + + #ifdef CONFIG_CPU_SUP_AMD +--- a/arch/x86/kernel/head32.c ++++ b/arch/x86/kernel/head32.c +@@ -145,10 +145,6 @@ void __init __no_stack_protector mk_earl + *ptr = (unsigned long)ptep + PAGE_OFFSET; + + #ifdef CONFIG_MICROCODE_INITRD32 +- /* Running on a hypervisor? */ +- if (native_cpuid_ecx(1) & BIT(31)) +- return; +- + params = (struct boot_params *)__pa_nodebug(&boot_params); + if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) + return; diff --git a/queue-6.14/xen-swiotlb-use-swiotlb-bouncing-if-kmalloc-allocation-demands-it.patch b/queue-6.14/xen-swiotlb-use-swiotlb-bouncing-if-kmalloc-allocation-demands-it.patch new file mode 100644 index 0000000000..99eb1746e0 --- /dev/null +++ b/queue-6.14/xen-swiotlb-use-swiotlb-bouncing-if-kmalloc-allocation-demands-it.patch @@ -0,0 +1,42 @@ +From cd9c058489053e172a6654cad82ee936d1b09fab Mon Sep 17 00:00:00 2001 +From: John Ernberg +Date: Fri, 2 May 2025 11:40:55 +0000 +Subject: xen: swiotlb: Use swiotlb bouncing if kmalloc allocation demands it + +From: John Ernberg + +commit cd9c058489053e172a6654cad82ee936d1b09fab upstream. + +Xen swiotlb support was missed when the patch set starting with +4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from +ARCH_DMA_MINALIGN") was merged. + +When running Xen on iMX8QXP, a SoC without IOMMU, the effect was that USB +transfers ended up corrupted when there was more than one URB inflight at +the same time. + +Add a call to dma_kmalloc_needs_bounce() to make sure that allocations too +small for DMA get bounced via swiotlb. + +Closes: https://lore.kernel.org/linux-usb/ab2776f0-b838-4cf6-a12a-c208eb6aad59@actia.se/ +Fixes: 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") +Cc: stable@kernel.org # v6.5+ +Signed-off-by: John Ernberg +Reviewed-by: Stefano Stabellini +Signed-off-by: Juergen Gross +Message-ID: <20250502114043.1968976-2-john.ernberg@actia.se> +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/swiotlb-xen.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/xen/swiotlb-xen.c ++++ b/drivers/xen/swiotlb-xen.c +@@ -217,6 +217,7 @@ static dma_addr_t xen_swiotlb_map_page(s + * buffering it. + */ + if (dma_capable(dev, dev_addr, size, true) && ++ !dma_kmalloc_needs_bounce(dev, size, dir) && + !range_straddles_page_boundary(phys, size) && + !xen_arch_need_swiotlb(dev, phys, dev_addr) && + !is_swiotlb_force_bounce(dev)) diff --git a/queue-6.14/xenbus-use-kref-to-track-req-lifetime.patch b/queue-6.14/xenbus-use-kref-to-track-req-lifetime.patch new file mode 100644 index 0000000000..047a733af5 --- /dev/null +++ b/queue-6.14/xenbus-use-kref-to-track-req-lifetime.patch @@ -0,0 +1,172 @@ +From 1f0304dfd9d217c2f8b04a9ef4b3258a66eedd27 Mon Sep 17 00:00:00 2001 +From: Jason Andryuk +Date: Tue, 6 May 2025 17:09:33 -0400 +Subject: xenbus: Use kref to track req lifetime +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jason Andryuk + +commit 1f0304dfd9d217c2f8b04a9ef4b3258a66eedd27 upstream. + +Marek reported seeing a NULL pointer fault in the xenbus_thread +callstack: +BUG: kernel NULL pointer dereference, address: 0000000000000000 +RIP: e030:__wake_up_common+0x4c/0x180 +Call Trace: + + __wake_up_common_lock+0x82/0xd0 + process_msg+0x18e/0x2f0 + xenbus_thread+0x165/0x1c0 + +process_msg+0x18e is req->cb(req). req->cb is set to xs_wake_up(), a +thin wrapper around wake_up(), or xenbus_dev_queue_reply(). It seems +like it was xs_wake_up() in this case. + +It seems like req may have woken up the xs_wait_for_reply(), which +kfree()ed the req. When xenbus_thread resumes, it faults on the zero-ed +data. + +Linux Device Drivers 2nd edition states: +"Normally, a wake_up call can cause an immediate reschedule to happen, +meaning that other processes might run before wake_up returns." +... which would match the behaviour observed. + +Change to keeping two krefs on each request. One for the caller, and +one for xenbus_thread. Each will kref_put() when finished, and the last +will free it. + +This use of kref matches the description in +Documentation/core-api/kref.rst + +Link: https://lore.kernel.org/xen-devel/ZO0WrR5J0xuwDIxW@mail-itl/ +Reported-by: Marek Marczykowski-Górecki +Fixes: fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") +Cc: stable@vger.kernel.org +Signed-off-by: Jason Andryuk +Reviewed-by: Juergen Gross +Signed-off-by: Juergen Gross +Message-ID: <20250506210935.5607-1-jason.andryuk@amd.com> +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/xenbus/xenbus.h | 2 ++ + drivers/xen/xenbus/xenbus_comms.c | 9 ++++----- + drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- + drivers/xen/xenbus/xenbus_xs.c | 18 ++++++++++++++++-- + 4 files changed, 23 insertions(+), 8 deletions(-) + +--- a/drivers/xen/xenbus/xenbus.h ++++ b/drivers/xen/xenbus/xenbus.h +@@ -77,6 +77,7 @@ enum xb_req_state { + struct xb_req_data { + struct list_head list; + wait_queue_head_t wq; ++ struct kref kref; + struct xsd_sockmsg msg; + uint32_t caller_req_id; + enum xsd_sockmsg_type type; +@@ -103,6 +104,7 @@ int xb_init_comms(void); + void xb_deinit_comms(void); + int xs_watch_msg(struct xs_watch_event *event); + void xs_request_exit(struct xb_req_data *req); ++void xs_free_req(struct kref *kref); + + int xenbus_match(struct device *_dev, const struct device_driver *_drv); + int xenbus_dev_probe(struct device *_dev); +--- a/drivers/xen/xenbus/xenbus_comms.c ++++ b/drivers/xen/xenbus/xenbus_comms.c +@@ -309,8 +309,8 @@ static int process_msg(void) + virt_wmb(); + req->state = xb_req_state_got_reply; + req->cb(req); +- } else +- kfree(req); ++ } ++ kref_put(&req->kref, xs_free_req); + } + + mutex_unlock(&xs_response_mutex); +@@ -386,14 +386,13 @@ static int process_writes(void) + state.req->msg.type = XS_ERROR; + state.req->err = err; + list_del(&state.req->list); +- if (state.req->state == xb_req_state_aborted) +- kfree(state.req); +- else { ++ if (state.req->state != xb_req_state_aborted) { + /* write err, then update state */ + virt_wmb(); + state.req->state = xb_req_state_got_reply; + wake_up(&state.req->wq); + } ++ kref_put(&state.req->kref, xs_free_req); + + mutex_unlock(&xb_write_mutex); + +--- a/drivers/xen/xenbus/xenbus_dev_frontend.c ++++ b/drivers/xen/xenbus/xenbus_dev_frontend.c +@@ -406,7 +406,7 @@ void xenbus_dev_queue_reply(struct xb_re + mutex_unlock(&u->reply_mutex); + + kfree(req->body); +- kfree(req); ++ kref_put(&req->kref, xs_free_req); + + kref_put(&u->kref, xenbus_file_free); + +--- a/drivers/xen/xenbus/xenbus_xs.c ++++ b/drivers/xen/xenbus/xenbus_xs.c +@@ -112,6 +112,12 @@ static void xs_suspend_exit(void) + wake_up_all(&xs_state_enter_wq); + } + ++void xs_free_req(struct kref *kref) ++{ ++ struct xb_req_data *req = container_of(kref, struct xb_req_data, kref); ++ kfree(req); ++} ++ + static uint32_t xs_request_enter(struct xb_req_data *req) + { + uint32_t rq_id; +@@ -237,6 +243,12 @@ static void xs_send(struct xb_req_data * + req->caller_req_id = req->msg.req_id; + req->msg.req_id = xs_request_enter(req); + ++ /* ++ * Take 2nd ref. One for this thread, and the second for the ++ * xenbus_thread. ++ */ ++ kref_get(&req->kref); ++ + mutex_lock(&xb_write_mutex); + list_add_tail(&req->list, &xb_write_list); + notify = list_is_singular(&xb_write_list); +@@ -261,8 +273,8 @@ static void *xs_wait_for_reply(struct xb + if (req->state == xb_req_state_queued || + req->state == xb_req_state_wait_reply) + req->state = xb_req_state_aborted; +- else +- kfree(req); ++ ++ kref_put(&req->kref, xs_free_req); + mutex_unlock(&xb_write_mutex); + + return ret; +@@ -291,6 +303,7 @@ int xenbus_dev_request_and_reply(struct + req->cb = xenbus_dev_queue_reply; + req->par = par; + req->user_req = true; ++ kref_init(&req->kref); + + xs_send(req, msg); + +@@ -319,6 +332,7 @@ static void *xs_talkv(struct xenbus_tran + req->num_vecs = num_vecs; + req->cb = xs_wake_up; + req->user_req = false; ++ kref_init(&req->kref); + + msg.req_id = 0; + msg.tx_id = t.id;