From: Greg Kroah-Hartman Date: Thu, 13 Feb 2025 13:13:47 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v6.6.78~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=68fe9e0eb6bad80047fe303d7ee5489edd06cc29;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch cachefiles-fix-null-pointer-dereference-in-object-file.patch ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch kvm-x86-make-x2apic-id-100-readonly.patch kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch mptcp-pm-only-set-fullmesh-for-subflow-endp.patch mptcp-prevent-excessive-coalescing-on-receive.patch ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch selftests-mptcp-join-fix-af_inet6-variable.patch tty-xilinx_uartps-split-sysrq-handling.patch x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch --- diff --git a/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch new file mode 100644 index 0000000000..0423945459 --- /dev/null +++ b/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch @@ -0,0 +1,42 @@ +From stable+bounces-114134-greg=kroah.com@vger.kernel.org Thu Feb 6 17:22:11 2025 +From: Koichiro Den +Date: Fri, 7 Feb 2025 01:21:31 +0900 +Subject: btrfs: avoid monopolizing a core when activating a swap file +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: wqu@suse.com, fdmanana@suse.com, dsterba@suse.com +Message-ID: <20250206162131.1387235-2-koichiro.den@canonical.com> + +From: Filipe Manana + +commit 2c8507c63f5498d4ee4af404a8e44ceae4345056 upstream. + +This commit re-attempts the backport of the change to the linux-6.6.y +branch. Commit 6e1a82259307 ("btrfs: avoid monopolizing a core when +activating a swap file") on this branch was reverted. + +During swap activation we iterate over the extents of a file and we can +have many thousands of them, so we can end up in a busy loop monopolizing +a core. Avoid this by doing a voluntary reschedule after processing each +extent. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Koichiro Den +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -10833,6 +10833,8 @@ static int btrfs_swap_activate(struct sw + } + + start += len; ++ ++ cond_resched(); + } + + if (bsi.block_len) diff --git a/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch b/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch new file mode 100644 index 0000000000..d7163131e2 --- /dev/null +++ b/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch @@ -0,0 +1,138 @@ +From 31ad74b20227ce6b40910ff78b1c604e42975cf1 Mon Sep 17 00:00:00 2001 +From: Zizhi Wo +Date: Thu, 7 Nov 2024 19:06:48 +0800 +Subject: cachefiles: Fix NULL pointer dereference in object->file + +From: Zizhi Wo + +commit 31ad74b20227ce6b40910ff78b1c604e42975cf1 upstream. + +At present, the object->file has the NULL pointer dereference problem in +ondemand-mode. The root cause is that the allocated fd and object->file +lifetime are inconsistent, and the user-space invocation to anon_fd uses +object->file. Following is the process that triggers the issue: + + [write fd] [umount] +cachefiles_ondemand_fd_write_iter + fscache_cookie_state_machine + cachefiles_withdraw_cookie + if (!file) return -ENOBUFS + cachefiles_clean_up_object + cachefiles_unmark_inode_in_use + fput(object->file) + object->file = NULL + // file NULL pointer dereference! + __cachefiles_write(..., file, ...) + +Fix this issue by add an additional reference count to the object->file +before write/llseek, and decrement after it finished. + +Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") +Signed-off-by: Zizhi Wo +Link: https://lore.kernel.org/r/20241107110649.3980193-5-wozizhi@huawei.com +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Signed-off-by: Bin Lan +Signed-off-by: Greg Kroah-Hartman +--- + fs/cachefiles/interface.c | 14 ++++++++++---- + fs/cachefiles/ondemand.c | 30 ++++++++++++++++++++++++------ + 2 files changed, 34 insertions(+), 10 deletions(-) + +--- a/fs/cachefiles/interface.c ++++ b/fs/cachefiles/interface.c +@@ -327,6 +327,8 @@ static void cachefiles_commit_object(str + static void cachefiles_clean_up_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) + { ++ struct file *file; ++ + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) { + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_see_object(object, cachefiles_obj_see_clean_delete); +@@ -342,10 +344,14 @@ static void cachefiles_clean_up_object(s + } + + cachefiles_unmark_inode_in_use(object, object->file); +- if (object->file) { +- fput(object->file); +- object->file = NULL; +- } ++ ++ spin_lock(&object->lock); ++ file = object->file; ++ object->file = NULL; ++ spin_unlock(&object->lock); ++ ++ if (file) ++ fput(file); + } + + /* +--- a/fs/cachefiles/ondemand.c ++++ b/fs/cachefiles/ondemand.c +@@ -61,20 +61,26 @@ static ssize_t cachefiles_ondemand_fd_wr + { + struct cachefiles_object *object = kiocb->ki_filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; +- struct file *file = object->file; ++ struct file *file; + size_t len = iter->count; + loff_t pos = kiocb->ki_pos; + const struct cred *saved_cred; + int ret; + +- if (!file) ++ spin_lock(&object->lock); ++ file = object->file; ++ if (!file) { ++ spin_unlock(&object->lock); + return -ENOBUFS; ++ } ++ get_file(file); ++ spin_unlock(&object->lock); + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, file, &pos, &len, true); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) +- return ret; ++ goto out; + + trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len); + ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); +@@ -83,6 +89,8 @@ static ssize_t cachefiles_ondemand_fd_wr + kiocb->ki_pos += ret; + } + ++out: ++ fput(file); + return ret; + } + +@@ -90,12 +98,22 @@ static loff_t cachefiles_ondemand_fd_lls + int whence) + { + struct cachefiles_object *object = filp->private_data; +- struct file *file = object->file; ++ struct file *file; ++ loff_t ret; + +- if (!file) ++ spin_lock(&object->lock); ++ file = object->file; ++ if (!file) { ++ spin_unlock(&object->lock); + return -ENOBUFS; ++ } ++ get_file(file); ++ spin_unlock(&object->lock); + +- return vfs_llseek(file, pos, whence); ++ ret = vfs_llseek(file, pos, whence); ++ fput(file); ++ ++ return ret; + } + + static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, diff --git a/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch b/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch new file mode 100644 index 0000000000..2811714392 --- /dev/null +++ b/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch @@ -0,0 +1,40 @@ +From 985b67cd86392310d9e9326de941c22fc9340eec Mon Sep 17 00:00:00 2001 +From: Lizhi Xu +Date: Wed, 5 Jun 2024 09:23:35 +0800 +Subject: ext4: filesystems without casefold feature cannot be mounted with siphash + +From: Lizhi Xu + +commit 985b67cd86392310d9e9326de941c22fc9340eec upstream. + +When mounting the ext4 filesystem, if the default hash version is set to +DX_HASH_SIPHASH but the casefold feature is not set, exit the mounting. + +Reported-by: syzbot+340581ba9dceb7e06fb3@syzkaller.appspotmail.com +Signed-off-by: Lizhi Xu +Link: https://patch.msgid.link/20240605012335.44086-1-lizhi.xu@windriver.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Bruno VERNAY +Signed-off-by: Victor Giraud +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3627,6 +3627,14 @@ int ext4_feature_set_ok(struct super_blo + } + #endif + ++ if (EXT4_SB(sb)->s_es->s_def_hash_version == DX_HASH_SIPHASH && ++ !ext4_has_feature_casefold(sb)) { ++ ext4_msg(sb, KERN_ERR, ++ "Filesystem without casefold feature cannot be " ++ "mounted with siphash"); ++ return 0; ++ } ++ + if (readonly) + return 1; + diff --git a/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch b/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch new file mode 100644 index 0000000000..0b7ecd4381 --- /dev/null +++ b/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch @@ -0,0 +1,123 @@ +From 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 2 Aug 2024 13:29:40 -0700 +Subject: KVM: x86: Make x2APIC ID 100% readonly + +From: Sean Christopherson + +commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 upstream. + +Ignore the userspace provided x2APIC ID when fixing up APIC state for +KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM. Commit +a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID +register"), which added the fixup, didn't intend to allow userspace to +modify the x2APIC ID. In fact, that commit is when KVM first started +treating the x2APIC ID as readonly, apparently to fix some race: + + static inline u32 kvm_apic_id(struct kvm_lapic *apic) + { +- return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff; ++ /* To avoid a race between apic_base and following APIC_ID update when ++ * switching to x2apic_mode, the x2apic mode returns initial x2apic id. ++ */ ++ if (apic_x2apic_mode(apic)) ++ return apic->vcpu->vcpu_id; ++ ++ return kvm_lapic_get_reg(apic, APIC_ID) >> 24; + } + +Furthermore, KVM doesn't support delivering interrupts to vCPUs with a +modified x2APIC ID, but KVM *does* return the modified value on a guest +RDMSR and for KVM_GET_LAPIC. I.e. no remotely sane setup can actually +work with a modified x2APIC ID. + +Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map +calculation, which expects the LDR to align with the x2APIC ID. + + WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm] + CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014 + RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm] + Call Trace: + + kvm_apic_set_state+0x1cf/0x5b0 [kvm] + kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm] + kvm_vcpu_ioctl+0x663/0x8a0 [kvm] + __x64_sys_ioctl+0xb8/0xf0 + do_syscall_64+0x56/0x80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + RIP: 0033:0x7fade8b9dd6f + +Unfortunately, the WARN can still trigger for other CPUs than the current +one by racing against KVM_SET_LAPIC, so remove it completely. + +Reported-by: Michal Luczaj +Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co +Reported-by: Haoyu Wu +Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com +Reported-by: syzbot+545f1326f405db4e1c3e@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com +Signed-off-by: Sean Christopherson +Message-ID: <20240802202941.344889-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: James Houghton +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/lapic.c | 22 +++++++++++++++------- + 1 file changed, 15 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -338,10 +338,8 @@ static void kvm_recalculate_logical_map( + * reversing the LDR calculation to get cluster of APICs, i.e. no + * additional work is required. + */ +- if (apic_x2apic_mode(apic)) { +- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); ++ if (apic_x2apic_mode(apic)) + return; +- } + + if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, + &cluster, &mask))) { +@@ -2964,18 +2962,28 @@ static int kvm_apic_state_fixup(struct k + struct kvm_lapic_state *s, bool set) + { + if (apic_x2apic_mode(vcpu->arch.apic)) { ++ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); + u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); + u64 icr; + + if (vcpu->kvm->arch.x2apic_format) { +- if (*id != vcpu->vcpu_id) ++ if (*id != x2apic_id) + return -EINVAL; + } else { ++ /* ++ * Ignore the userspace value when setting APIC state. ++ * KVM's model is that the x2APIC ID is readonly, e.g. ++ * KVM only supports delivering interrupts to KVM's ++ * version of the x2APIC ID. However, for backwards ++ * compatibility, don't reject attempts to set a ++ * mismatched ID for userspace that hasn't opted into ++ * x2apic_format. ++ */ + if (set) +- *id >>= 24; ++ *id = x2apic_id; + else +- *id <<= 24; ++ *id = x2apic_id << 24; + } + + /* +@@ -2984,7 +2992,7 @@ static int kvm_apic_state_fixup(struct k + * split to ICR+ICR2 in userspace for backwards compatibility. + */ + if (set) { +- *ldr = kvm_apic_calc_x2apic_ldr(*id); ++ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); + + icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | + (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; diff --git a/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch b/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch new file mode 100644 index 0000000000..2114e0305f --- /dev/null +++ b/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch @@ -0,0 +1,151 @@ +From 73b42dc69be8564d4951a14d00f827929fe5ef79 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 19 Jul 2024 16:51:00 -0700 +Subject: KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC) + +From: Sean Christopherson + +commit 73b42dc69be8564d4951a14d00f827929fe5ef79 upstream. + +Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's +IPI virtualization support, but only for AMD. While not stated anywhere +in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs +store the 64-bit ICR as two separate 32-bit values in ICR and ICR2. When +IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled, +KVM needs to match CPU behavior as some ICR ICR writes will be handled by +the CPU, not by KVM. + +Add a kvm_x86_ops knob to control the underlying format used by the CPU to +store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether +or not x2AVIC is enabled. If KVM is handling all ICR writes, the storage +format for x2APIC mode doesn't matter, and having the behavior follow AMD +versus Intel will provide better test coverage and ease debugging. + +Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") +Cc: stable@vger.kernel.org +Cc: Maxim Levitsky +Cc: Suravee Suthikulpanit +Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com +Signed-off-by: Sean Christopherson +[JH: fixed conflict with vmx_x86_ops reshuffle due to missing commit 5f18c642ff7e2] +Signed-off-by: James Houghton +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 2 + + arch/x86/kvm/lapic.c | 42 ++++++++++++++++++++++++++++------------ + arch/x86/kvm/svm/svm.c | 2 + + arch/x86/kvm/vmx/vmx.c | 2 + + 4 files changed, 36 insertions(+), 12 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1650,6 +1650,8 @@ struct kvm_x86_ops { + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); ++ ++ const bool x2apic_icr_is_split; + const unsigned long required_apicv_inhibits; + bool allow_apicv_in_x2apic_without_x2apic_virtualization; + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2459,11 +2459,25 @@ int kvm_x2apic_icr_write(struct kvm_lapi + data &= ~APIC_ICR_BUSY; + + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); +- kvm_lapic_set_reg64(apic, APIC_ICR, data); ++ if (kvm_x86_ops.x2apic_icr_is_split) { ++ kvm_lapic_set_reg(apic, APIC_ICR, data); ++ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); ++ } else { ++ kvm_lapic_set_reg64(apic, APIC_ICR, data); ++ } + trace_kvm_apic_write(APIC_ICR, data); + return 0; + } + ++static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) ++{ ++ if (kvm_x86_ops.x2apic_icr_is_split) ++ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) | ++ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32; ++ ++ return kvm_lapic_get_reg64(apic, APIC_ICR); ++} ++ + /* emulate APIC access in a trap manner */ + void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) + { +@@ -2481,7 +2495,7 @@ void kvm_apic_write_nodecode(struct kvm_ + * maybe-unecessary write, and both are in the noise anyways. + */ + if (apic_x2apic_mode(apic) && offset == APIC_ICR) +- WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); ++ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic))); + else + kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); + } +@@ -2988,18 +3002,22 @@ static int kvm_apic_state_fixup(struct k + + /* + * In x2APIC mode, the LDR is fixed and based on the id. And +- * ICR is internally a single 64-bit register, but needs to be +- * split to ICR+ICR2 in userspace for backwards compatibility. ++ * if the ICR is _not_ split, ICR is internally a single 64-bit ++ * register, but needs to be split to ICR+ICR2 in userspace for ++ * backwards compatibility. + */ +- if (set) { ++ if (set) + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); + +- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | +- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; +- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); +- } else { +- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); +- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); ++ if (!kvm_x86_ops.x2apic_icr_is_split) { ++ if (set) { ++ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | ++ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; ++ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); ++ } else { ++ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); ++ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); ++ } + } + } + +@@ -3196,7 +3214,7 @@ static int kvm_lapic_msr_read(struct kvm + u32 low; + + if (reg == APIC_ICR) { +- *data = kvm_lapic_get_reg64(apic, APIC_ICR); ++ *data = kvm_x2apic_icr_read(apic); + return 0; + } + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -5014,6 +5014,8 @@ static struct kvm_x86_ops svm_x86_ops __ + .enable_nmi_window = svm_enable_nmi_window, + .enable_irq_window = svm_enable_irq_window, + .update_cr8_intercept = svm_update_cr8_intercept, ++ ++ .x2apic_icr_is_split = true, + .set_virtual_apic_mode = avic_refresh_virtual_apic_mode, + .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, + .apicv_post_state_restore = avic_apicv_post_state_restore, +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -8323,6 +8323,8 @@ static struct kvm_x86_ops vmx_x86_ops __ + .enable_nmi_window = vmx_enable_nmi_window, + .enable_irq_window = vmx_enable_irq_window, + .update_cr8_intercept = vmx_update_cr8_intercept, ++ ++ .x2apic_icr_is_split = false, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, diff --git a/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch b/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch new file mode 100644 index 0000000000..1fa4164cfb --- /dev/null +++ b/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch @@ -0,0 +1,114 @@ +From stable+bounces-114455-greg=kroah.com@vger.kernel.org Sun Feb 9 18:42:14 2025 +From: "Matthieu Baerts (NGI0)" +Date: Sun, 9 Feb 2025 18:41:55 +0100 +Subject: mptcp: pm: only set fullmesh for subflow endp +To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: "Matthieu Baerts (NGI0)" , syzbot+cd16e79c1e45f3fe0377@syzkaller.appspotmail.com, Mat Martineau , Jakub Kicinski +Message-ID: <20250209174153.3388802-6-matttbe@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +commit 1bb0d1348546ad059f55c93def34e67cb2a034a6 upstream. + +With the in-kernel path-manager, it is possible to change the 'fullmesh' +flag. The code in mptcp_pm_nl_fullmesh() expects to change it only on +'subflow' endpoints, to recreate more or less subflows using the linked +address. + +Unfortunately, the set_flags() hook was a bit more permissive, and +allowed 'implicit' endpoints to get the 'fullmesh' flag while it is not +allowed before. + +That's what syzbot found, triggering the following warning: + + WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 __mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline] + WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline] + WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline] + WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064 + Modules linked in: + CPU: 0 UID: 0 PID: 6499 Comm: syz.1.413 Not tainted 6.13.0-rc5-syzkaller-00172-gd1bf27c4e176 #0 + Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 + RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline] + RIP: 0010:mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline] + RIP: 0010:mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline] + RIP: 0010:mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064 + Code: 01 00 00 49 89 c5 e8 fb 45 e8 f5 e9 b8 fc ff ff e8 f1 45 e8 f5 4c 89 f7 be 03 00 00 00 e8 44 1d 0b f9 eb a0 e8 dd 45 e8 f5 90 <0f> 0b 90 e9 17 ff ff ff 89 d9 80 e1 07 38 c1 0f 8c c9 fc ff ff 48 + RSP: 0018:ffffc9000d307240 EFLAGS: 00010293 + RAX: ffffffff8bb72e03 RBX: 0000000000000000 RCX: ffff88807da88000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: ffffc9000d307430 R08: ffffffff8bb72cf0 R09: 1ffff1100b842a5e + R10: dffffc0000000000 R11: ffffed100b842a5f R12: ffff88801e2e5ac0 + R13: ffff88805c214800 R14: ffff88805c2152e8 R15: 1ffff1100b842a5d + FS: 00005555619f6500(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000020002840 CR3: 00000000247e6000 CR4: 00000000003526f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] + genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] + genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 + netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2542 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 + netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline] + netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1347 + netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1891 + sock_sendmsg_nosec net/socket.c:711 [inline] + __sock_sendmsg+0x221/0x270 net/socket.c:726 + ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2583 + ___sys_sendmsg net/socket.c:2637 [inline] + __sys_sendmsg+0x269/0x350 net/socket.c:2669 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + RIP: 0033:0x7f5fe8785d29 + Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 + RSP: 002b:00007fff571f5558 EFLAGS: 00000246 ORIG_RAX: 000000000000002e + RAX: ffffffffffffffda RBX: 00007f5fe8975fa0 RCX: 00007f5fe8785d29 + RDX: 0000000000000000 RSI: 0000000020000480 RDI: 0000000000000007 + RBP: 00007f5fe8801b08 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + R13: 00007f5fe8975fa0 R14: 00007f5fe8975fa0 R15: 00000000000011f4 + + +Here, syzbot managed to set the 'fullmesh' flag on an 'implicit' and +used -- according to 'id_avail_bitmap' -- endpoint, causing the PM to +try decrement the local_addr_used counter which is only incremented for +the 'subflow' endpoint. + +Note that 'no type' endpoints -- not 'subflow', 'signal', 'implicit' -- +are fine, because their ID will not be marked as used in the 'id_avail' +bitmap, and setting 'fullmesh' can help forcing the creation of subflow +when receiving an ADD_ADDR. + +Fixes: 73c762c1f07d ("mptcp: set fullmesh flag in pm_netlink") +Cc: stable@vger.kernel.org +Reported-by: syzbot+cd16e79c1e45f3fe0377@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/6786ac51.050a0220.216c54.00a6.GAE@google.com +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/540 +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20250123-net-mptcp-syzbot-issues-v1-2-af73258a726f@kernel.org +Signed-off-by: Jakub Kicinski +[ Conflicts in pm_netlink.c, because the code has been moved around in + commit 6a42477fe449 ("mptcp: update set_flags interfaces"), but the + same fix can still be applied at the original place. ] +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_netlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -2069,7 +2069,8 @@ int mptcp_pm_nl_set_flags(struct net *ne + return -EINVAL; + } + if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && +- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { ++ (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | ++ MPTCP_PM_ADDR_FLAG_IMPLICIT))) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } diff --git a/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch b/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch new file mode 100644 index 0000000000..aab6e721ba --- /dev/null +++ b/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch @@ -0,0 +1,41 @@ +From stable+bounces-114456-greg=kroah.com@vger.kernel.org Sun Feb 9 18:42:14 2025 +From: "Matthieu Baerts (NGI0)" +Date: Sun, 9 Feb 2025 18:41:56 +0100 +Subject: mptcp: prevent excessive coalescing on receive +To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski +Message-ID: <20250209174153.3388802-7-matttbe@kernel.org> + +From: Paolo Abeni + +commit 56b824eb49d6258aa0bad09a406ceac3f643cdae upstream. + +Currently the skb size after coalescing is only limited by the skb +layout (the skb must not carry frag_list). A single coalesced skb +covering several MSS can potentially fill completely the receive +buffer. In such a case, the snd win will zero until the receive buffer +will be empty again, affecting tput badly. + +Fixes: 8268ed4c9d19 ("mptcp: introduce and use mptcp_try_coalesce()") +Cc: stable@vger.kernel.org # please delay 2 weeks after 6.13-final release +Signed-off-by: Paolo Abeni +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-3-8608af434ceb@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -138,6 +138,7 @@ static bool mptcp_try_coalesce(struct so + int delta; + + if (MPTCP_SKB_CB(from)->offset || ++ ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || + !skb_try_coalesce(to, from, &fragstolen, &delta)) + return false; + diff --git a/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch b/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch new file mode 100644 index 0000000000..6628e52d1d --- /dev/null +++ b/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch @@ -0,0 +1,217 @@ +From b0fce54b8c0d8e5f2b4c243c803c5996e73baee8 Mon Sep 17 00:00:00 2001 +From: Su Yue +Date: Mon, 6 Jan 2025 22:06:40 +0800 +Subject: ocfs2: check dir i_size in ocfs2_find_entry + +From: Su Yue + +commit b0fce54b8c0d8e5f2b4c243c803c5996e73baee8 upstream. + +syz reports an out of bounds read: + +================================================================== +BUG: KASAN: slab-out-of-bounds in ocfs2_match fs/ocfs2/dir.c:334 +[inline] +BUG: KASAN: slab-out-of-bounds in ocfs2_search_dirblock+0x283/0x6e0 +fs/ocfs2/dir.c:367 +Read of size 1 at addr ffff88804d8b9982 by task syz-executor.2/14802 + +CPU: 0 UID: 0 PID: 14802 Comm: syz-executor.2 Not tainted 6.13.0-rc4 #2 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 +04/01/2014 +Sched_ext: serialise (enabled+all), task: runnable_at=-10ms +Call Trace: + +__dump_stack lib/dump_stack.c:94 [inline] +dump_stack_lvl+0x229/0x350 lib/dump_stack.c:120 +print_address_description mm/kasan/report.c:378 [inline] +print_report+0x164/0x530 mm/kasan/report.c:489 +kasan_report+0x147/0x180 mm/kasan/report.c:602 +ocfs2_match fs/ocfs2/dir.c:334 [inline] +ocfs2_search_dirblock+0x283/0x6e0 fs/ocfs2/dir.c:367 +ocfs2_find_entry_id fs/ocfs2/dir.c:414 [inline] +ocfs2_find_entry+0x1143/0x2db0 fs/ocfs2/dir.c:1078 +ocfs2_find_files_on_disk+0x18e/0x530 fs/ocfs2/dir.c:1981 +ocfs2_lookup_ino_from_name+0xb6/0x110 fs/ocfs2/dir.c:2003 +ocfs2_lookup+0x30a/0xd40 fs/ocfs2/namei.c:122 +lookup_open fs/namei.c:3627 [inline] +open_last_lookups fs/namei.c:3748 [inline] +path_openat+0x145a/0x3870 fs/namei.c:3984 +do_filp_open+0xe9/0x1c0 fs/namei.c:4014 +do_sys_openat2+0x135/0x1d0 fs/open.c:1402 +do_sys_open fs/open.c:1417 [inline] +__do_sys_openat fs/open.c:1433 [inline] +__se_sys_openat fs/open.c:1428 [inline] +__x64_sys_openat+0x15d/0x1c0 fs/open.c:1428 +do_syscall_x64 arch/x86/entry/common.c:52 [inline] +do_syscall_64+0xf6/0x210 arch/x86/entry/common.c:83 +entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f01076903ad +Code: c3 e8 a7 2b 00 00 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 +f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 +f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f01084acfc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 +RAX: ffffffffffffffda RBX: 00007f01077cbf80 RCX: 00007f01076903ad +RDX: 0000000000105042 RSI: 0000000020000080 RDI: ffffffffffffff9c +RBP: 00007f01077cbf80 R08: 0000000000000000 R09: 0000000000000000 +R10: 00000000000001ff R11: 0000000000000246 R12: 0000000000000000 +R13: 00007f01077cbf80 R14: 00007f010764fc90 R15: 00007f010848d000 + +================================================================== + +And a general protection fault in ocfs2_prepare_dir_for_insert: + +================================================================== +loop0: detected capacity change from 0 to 32768 +JBD2: Ignoring recovery information on journal +ocfs2: Mounting device (7,0) on (node local, slot 0) with ordered data +mode. +Oops: general protection fault, probably for non-canonical address +0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +CPU: 0 UID: 0 PID: 5096 Comm: syz-executor792 Not tainted +6.11.0-rc4-syzkaller-00002-gb0da640826ba #0 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS +1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 +RIP: 0010:ocfs2_find_dir_space_id fs/ocfs2/dir.c:3406 [inline] +RIP: 0010:ocfs2_prepare_dir_for_insert+0x3309/0x5c70 fs/ocfs2/dir.c:4280 +Code: 00 00 e8 2a 25 13 fe e9 ba 06 00 00 e8 20 25 13 fe e9 4f 01 00 00 +e8 16 25 13 fe 49 8d 7f 08 49 8d 5f 09 48 89 f8 48 c1 e8 03 <42> 0f b6 +04 20 84 c0 0f 85 bd 23 00 00 48 89 d8 48 c1 e8 03 42 0f +RSP: 0018:ffffc9000af9f020 EFLAGS: 00010202 +RAX: 0000000000000001 RBX: 0000000000000009 RCX: ffff88801e27a440 +RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000008 +RBP: ffffc9000af9f830 R08: ffffffff8380395b R09: ffffffff838090a7 +R10: 0000000000000002 R11: ffff88801e27a440 R12: dffffc0000000000 +R13: ffff88803c660878 R14: f700000000000088 R15: 0000000000000000 +FS: 000055555a677380(0000) GS:ffff888020800000(0000) +knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000560bce569178 CR3: 000000001de5a000 CR4: 0000000000350ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + +ocfs2_mknod+0xcaf/0x2b40 fs/ocfs2/namei.c:292 +vfs_mknod+0x36d/0x3b0 fs/namei.c:4088 +do_mknodat+0x3ec/0x5b0 +__do_sys_mknodat fs/namei.c:4166 [inline] +__se_sys_mknodat fs/namei.c:4163 [inline] +__x64_sys_mknodat+0xa7/0xc0 fs/namei.c:4163 +do_syscall_x64 arch/x86/entry/common.c:52 [inline] +do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 +entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f2dafda3a99 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 17 00 00 90 48 89 +f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 +0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 +64 89 01 48 +RSP: 002b:00007ffe336a6658 EFLAGS: 00000246 ORIG_RAX: +0000000000000103 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: +00007f2dafda3a99 +RDX: 00000000000021c0 RSI: 0000000020000040 RDI: +00000000ffffff9c +RBP: 00007f2dafe1b5f0 R08: 0000000000004480 R09: +000055555a6784c0 +R10: 0000000000000103 R11: 0000000000000246 R12: +00007ffe336a6680 +R13: 00007ffe336a68a8 R14: 431bde82d7b634db R15: +00007f2dafdec03b + +================================================================== + +The two reports are all caused invalid negative i_size of dir inode. For +ocfs2, dir_inode can't be negative or zero. + +Here add a check in which is called by ocfs2_check_dir_for_entry(). It +fixes the second report as ocfs2_check_dir_for_entry() must be called +before ocfs2_prepare_dir_for_insert(). Also set a up limit for dir with +OCFS2_INLINE_DATA_FL. The i_size can't be great than blocksize. + +Link: https://lkml.kernel.org/r/20250106140640.92260-1-glass.su@suse.com +Reported-by: Jiacheng Xu +Link: https://lore.kernel.org/ocfs2-devel/17a04f01.1ae74.19436d003fc.Coremail.stitch@zju.edu.cn/T/#u +Reported-by: syzbot+5a64828fcc4c2ad9b04f@syzkaller.appspotmail.com +Link: https://lore.kernel.org/all/0000000000005894f3062018caf1@google.com/T/ +Signed-off-by: Su Yue +Reviewed-by: Heming Zhao +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/dir.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +--- a/fs/ocfs2/dir.c ++++ b/fs/ocfs2/dir.c +@@ -1065,26 +1065,39 @@ int ocfs2_find_entry(const char *name, i + { + struct buffer_head *bh; + struct ocfs2_dir_entry *res_dir = NULL; ++ int ret = 0; + + if (ocfs2_dir_indexed(dir)) + return ocfs2_find_entry_dx(name, namelen, dir, lookup); + ++ if (unlikely(i_size_read(dir) <= 0)) { ++ ret = -EFSCORRUPTED; ++ mlog_errno(ret); ++ goto out; ++ } + /* + * The unindexed dir code only uses part of the lookup + * structure, so there's no reason to push it down further + * than this. + */ +- if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ++ if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ++ if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) { ++ ret = -EFSCORRUPTED; ++ mlog_errno(ret); ++ goto out; ++ } + bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); +- else ++ } else { + bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir); ++ } + + if (bh == NULL) + return -ENOENT; + + lookup->dl_leaf_bh = bh; + lookup->dl_entry = res_dir; +- return 0; ++out: ++ return ret; + } + + /* +@@ -2012,6 +2025,7 @@ int ocfs2_lookup_ino_from_name(struct in + * + * Return 0 if the name does not exist + * Return -EEXIST if the directory contains the name ++ * Return -EFSCORRUPTED if found corruption + * + * Callers should have i_rwsem + a cluster lock on dir + */ +@@ -2025,9 +2039,12 @@ int ocfs2_check_dir_for_entry(struct ino + trace_ocfs2_check_dir_for_entry( + (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); + +- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { ++ ret = ocfs2_find_entry(name, namelen, dir, &lookup); ++ if (ret == 0) { + ret = -EEXIST; + mlog_errno(ret); ++ } else if (ret == -ENOENT) { ++ ret = 0; + } + + ocfs2_free_dir_lookup_result(&lookup); diff --git a/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch new file mode 100644 index 0000000000..e3a5ef43b9 --- /dev/null +++ b/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch @@ -0,0 +1,36 @@ +From stable+bounces-114133-greg=kroah.com@vger.kernel.org Thu Feb 6 17:22:09 2025 +From: Koichiro Den +Date: Fri, 7 Feb 2025 01:21:30 +0900 +Subject: Revert "btrfs: avoid monopolizing a core when activating a swap file" +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: wqu@suse.com, fdmanana@suse.com, dsterba@suse.com +Message-ID: <20250206162131.1387235-1-koichiro.den@canonical.com> + +From: Koichiro Den + +This reverts commit 6e1a8225930719a9f352d56320214e33e2dde0a6. + +The backport for linux-6.6.y, commit 6e1a82259307 ("btrfs: avoid +monopolizing a core when activating a swap file"), inserted +cond_resched() in the wrong location. + +Revert it now; a subsequent commit will re-backport the original patch. + +Fixes: 6e1a82259307 ("btrfs: avoid monopolizing a core when activating a swap file") # linux-6.6.y +Signed-off-by: Koichiro Den +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -7153,8 +7153,6 @@ noinline int can_nocow_extent(struct ino + ret = -EAGAIN; + goto out; + } +- +- cond_resched(); + } + + if (orig_start) diff --git a/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch b/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch new file mode 100644 index 0000000000..610f568cbf --- /dev/null +++ b/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch @@ -0,0 +1,40 @@ +From stable+bounces-114457-greg=kroah.com@vger.kernel.org Sun Feb 9 18:42:17 2025 +From: "Matthieu Baerts (NGI0)" +Date: Sun, 9 Feb 2025 18:41:57 +0100 +Subject: selftests: mptcp: join: fix AF_INET6 variable +To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: "Matthieu Baerts (NGI0)" +Message-ID: <20250209174153.3388802-8-matttbe@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +The Fixes commit is a backport renaming a variable, from AF_INET6 to +MPTCP_LIB_AF_INET6. + +The commit has been applied without conflicts, except that it missed one +extra variable that was in v6.6, but not in the version linked to the +Fixes commit. + +This variable has then been renamed too to avoid these errors: + + LISTENER_CREATED 10.0.2.1:10100 ./mptcp_join.sh: line 2944: [: 2: unary operator expected + LISTENER_CLOSED 10.0.2.1:10100 ./mptcp_join.sh: line 2944: [: 2: unary operator expected + +Fixes: a17d1419126b ("selftests: mptcp: declare event macros in mptcp_lib") +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -2941,7 +2941,7 @@ verify_listener_events() + type=$(mptcp_lib_evts_get_info type "$evt" "$e_type") + family=$(mptcp_lib_evts_get_info family "$evt" "$e_type") + sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type") +- if [ $family ] && [ $family = $AF_INET6 ]; then ++ if [ $family ] && [ $family = $MPTCP_LIB_AF_INET6 ]; then + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type") + else + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type") diff --git a/queue-6.6/series b/queue-6.6/series index c28751dfdb..37682a225f 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -260,3 +260,15 @@ mips-ftrace-declare-ftrace_get_parent_ra_addr-as-static.patch spi-atmel-quadspi-create-atmel_qspi_ops-to-support-newer-soc-families.patch spi-atmel-qspi-memory-barriers-after-memory-mapped-i-o.patch net-ncsi-use-dev_set_mac_address-for-get-mc-mac-address-handling.patch +ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch +revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch +btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch +ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch +cachefiles-fix-null-pointer-dereference-in-object-file.patch +mptcp-pm-only-set-fullmesh-for-subflow-endp.patch +mptcp-prevent-excessive-coalescing-on-receive.patch +selftests-mptcp-join-fix-af_inet6-variable.patch +x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch +tty-xilinx_uartps-split-sysrq-handling.patch +kvm-x86-make-x2apic-id-100-readonly.patch +kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch diff --git a/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch b/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch new file mode 100644 index 0000000000..00bbdd4a9a --- /dev/null +++ b/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch @@ -0,0 +1,77 @@ +From b06f388994500297bb91be60ffaf6825ecfd2afe Mon Sep 17 00:00:00 2001 +From: Sean Anderson +Date: Fri, 10 Jan 2025 16:38:22 -0500 +Subject: tty: xilinx_uartps: split sysrq handling + +From: Sean Anderson + +commit b06f388994500297bb91be60ffaf6825ecfd2afe upstream. + +lockdep detects the following circular locking dependency: + +CPU 0 CPU 1 +========================== ============================ +cdns_uart_isr() printk() + uart_port_lock(port) console_lock() + cdns_uart_console_write() + if (!port->sysrq) + uart_port_lock(port) + uart_handle_break() + port->sysrq = ... + uart_handle_sysrq_char() + printk() + console_lock() + +The fixed commit attempts to avoid this situation by only taking the +port lock in cdns_uart_console_write if port->sysrq unset. However, if +(as shown above) cdns_uart_console_write runs before port->sysrq is set, +then it will try to take the port lock anyway. This may result in a +deadlock. + +Fix this by splitting sysrq handling into two parts. We use the prepare +helper under the port lock and defer handling until we release the lock. + +Fixes: 74ea66d4ca06 ("tty: xuartps: Improve sysrq handling") +Signed-off-by: Sean Anderson +Cc: stable@vger.kernel.org # c980248179d: serial: xilinx_uartps: Use port lock wrappers +Acked-by: John Ogness +Link: https://lore.kernel.org/r/20250110213822.2107462-1-sean.anderson@linux.dev +Signed-off-by: Sean Anderson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/xilinx_uartps.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/drivers/tty/serial/xilinx_uartps.c ++++ b/drivers/tty/serial/xilinx_uartps.c +@@ -268,7 +268,7 @@ static void cdns_uart_handle_rx(void *de + continue; + } + +- if (uart_handle_sysrq_char(port, data)) ++ if (uart_prepare_sysrq_char(port, data)) + continue; + + if (is_rxbs_support) { +@@ -369,7 +369,7 @@ static irqreturn_t cdns_uart_isr(int irq + !(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_RX_DIS)) + cdns_uart_handle_rx(dev_id, isrstatus); + +- spin_unlock(&port->lock); ++ uart_unlock_and_check_sysrq(port); + return IRQ_HANDLED; + } + +@@ -1229,10 +1229,8 @@ static void cdns_uart_console_write(stru + unsigned int imr, ctrl; + int locked = 1; + +- if (port->sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = spin_trylock_irqsave(&port->lock, flags); ++ if (oops_in_progress) ++ locked = uart_port_trylock_irqsave(port, &flags); + else + spin_lock_irqsave(&port->lock, flags); + diff --git a/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch b/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch new file mode 100644 index 0000000000..525717213f --- /dev/null +++ b/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch @@ -0,0 +1,77 @@ +From cc31744a294584a36bf764a0ffa3255a8e69f036 Mon Sep 17 00:00:00 2001 +From: Steve Wahl +Date: Wed, 17 Jul 2024 16:31:21 -0500 +Subject: x86/mm/ident_map: Use gbpages only where full GB page should be mapped. + +From: Steve Wahl + +commit cc31744a294584a36bf764a0ffa3255a8e69f036 upstream. + +When ident_pud_init() uses only GB pages to create identity maps, large +ranges of addresses not actually requested can be included in the resulting +table; a 4K request will map a full GB. This can include a lot of extra +address space past that requested, including areas marked reserved by the +BIOS. That allows processor speculation into reserved regions, that on UV +systems can cause system halts. + +Only use GB pages when map creation requests include the full GB page of +space. Fall back to using smaller 2M pages when only portions of a GB page +are included in the request. + +No attempt is made to coalesce mapping requests. If a request requires a +map entry at the 2M (pmd) level, subsequent mapping requests within the +same 1G region will also be at the pmd level, even if adjacent or +overlapping such requests could have been combined to map a full GB page. +Existing usage starts with larger regions and then adds smaller regions, so +this should not have any great consequence. + +Signed-off-by: Steve Wahl +Signed-off-by: Thomas Gleixner +Tested-by: Pavin Joseph +Tested-by: Sarah Brofeldt +Tested-by: Eric Hagberg +Link: https://lore.kernel.org/all/20240717213121.3064030-3-steve.wahl@hpe.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/ident_map.c | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +--- a/arch/x86/mm/ident_map.c ++++ b/arch/x86/mm/ident_map.c +@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_map + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; ++ bool use_gbpage; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + +- if (info->direct_gbpages) { +- pud_t pudval; ++ /* if this is already a gbpage, this portion is already mapped */ ++ if (pud_leaf(*pud)) ++ continue; ++ ++ /* Is using a gbpage allowed? */ ++ use_gbpage = info->direct_gbpages; + +- if (pud_present(*pud)) +- continue; ++ /* Don't use gbpage if it maps more than the requested region. */ ++ /* at the begining: */ ++ use_gbpage &= ((addr & ~PUD_MASK) == 0); ++ /* ... or at the end: */ ++ use_gbpage &= ((next & ~PUD_MASK) == 0); ++ ++ /* Never overwrite existing mappings */ ++ use_gbpage &= !pud_present(*pud); ++ ++ if (use_gbpage) { ++ pud_t pudval; + +- addr &= PUD_MASK; + pudval = __pud((addr - info->offset) | info->page_flag); + set_pud(pud, pudval); + continue;