6.6-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)
diff --git a/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch

new file mode 100644 (file)

index 0000000..0423945
--- /dev/null
+++ b/queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
@@ -0,0 +1,42 @@
+From stable+bounces-114134-greg=kroah.com@vger.kernel.org Thu Feb  6 17:22:11 2025
+From: Koichiro Den <koichiro.den@canonical.com>
+Date: Fri,  7 Feb 2025 01:21:31 +0900
+Subject: btrfs: avoid monopolizing a core when activating a swap file
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: wqu@suse.com, fdmanana@suse.com, dsterba@suse.com
+Message-ID: <20250206162131.1387235-2-koichiro.den@canonical.com>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 2c8507c63f5498d4ee4af404a8e44ceae4345056 upstream.
+
+This commit re-attempts the backport of the change to the linux-6.6.y
+branch. Commit 6e1a82259307 ("btrfs: avoid monopolizing a core when
+activating a swap file") on this branch was reverted.
+
+During swap activation we iterate over the extents of a file and we can
+have many thousands of them, so we can end up in a busy loop monopolizing
+a core. Avoid this by doing a voluntary reschedule after processing each
+extent.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Koichiro Den <koichiro.den@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10833,6 +10833,8 @@ static int btrfs_swap_activate(struct sw
+               }
+ 
+               start += len;
++
++              cond_resched();
+       }
+ 
+       if (bsi.block_len)
diff --git a/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch b/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch

new file mode 100644 (file)

index 0000000..d716313
--- /dev/null
+++ b/queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch
@@ -0,0 +1,138 @@
+From 31ad74b20227ce6b40910ff78b1c604e42975cf1 Mon Sep 17 00:00:00 2001
+From: Zizhi Wo <wozizhi@huawei.com>
+Date: Thu, 7 Nov 2024 19:06:48 +0800
+Subject: cachefiles: Fix NULL pointer dereference in object->file
+
+From: Zizhi Wo <wozizhi@huawei.com>
+
+commit 31ad74b20227ce6b40910ff78b1c604e42975cf1 upstream.
+
+At present, the object->file has the NULL pointer dereference problem in
+ondemand-mode. The root cause is that the allocated fd and object->file
+lifetime are inconsistent, and the user-space invocation to anon_fd uses
+object->file. Following is the process that triggers the issue:
+
+         [write fd]                            [umount]
+cachefiles_ondemand_fd_write_iter
+                                      fscache_cookie_state_machine
+                                        cachefiles_withdraw_cookie
+  if (!file) return -ENOBUFS
+                                          cachefiles_clean_up_object
+                                            cachefiles_unmark_inode_in_use
+                                            fput(object->file)
+                                            object->file = NULL
+  // file NULL pointer dereference!
+  __cachefiles_write(..., file, ...)
+
+Fix this issue by add an additional reference count to the object->file
+before write/llseek, and decrement after it finished.
+
+Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie")
+Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
+Link: https://lore.kernel.org/r/20241107110649.3980193-5-wozizhi@huawei.com
+Reviewed-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Bin Lan <lanbincn@qq.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cachefiles/interface.c |   14 ++++++++++----
+ fs/cachefiles/ondemand.c  |   30 ++++++++++++++++++++++++------
+ 2 files changed, 34 insertions(+), 10 deletions(-)
+
+--- a/fs/cachefiles/interface.c
++++ b/fs/cachefiles/interface.c
+@@ -327,6 +327,8 @@ static void cachefiles_commit_object(str
+ static void cachefiles_clean_up_object(struct cachefiles_object *object,
+                                      struct cachefiles_cache *cache)
+ {
++      struct file *file;
++
+       if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) {
+               if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+                       cachefiles_see_object(object, cachefiles_obj_see_clean_delete);
+@@ -342,10 +344,14 @@ static void cachefiles_clean_up_object(s
+       }
+ 
+       cachefiles_unmark_inode_in_use(object, object->file);
+-      if (object->file) {
+-              fput(object->file);
+-              object->file = NULL;
+-      }
++
++      spin_lock(&object->lock);
++      file = object->file;
++      object->file = NULL;
++      spin_unlock(&object->lock);
++
++      if (file)
++              fput(file);
+ }
+ 
+ /*
+--- a/fs/cachefiles/ondemand.c
++++ b/fs/cachefiles/ondemand.c
+@@ -61,20 +61,26 @@ static ssize_t cachefiles_ondemand_fd_wr
+ {
+       struct cachefiles_object *object = kiocb->ki_filp->private_data;
+       struct cachefiles_cache *cache = object->volume->cache;
+-      struct file *file = object->file;
++      struct file *file;
+       size_t len = iter->count;
+       loff_t pos = kiocb->ki_pos;
+       const struct cred *saved_cred;
+       int ret;
+ 
+-      if (!file)
++      spin_lock(&object->lock);
++      file = object->file;
++      if (!file) {
++              spin_unlock(&object->lock);
+               return -ENOBUFS;
++      }
++      get_file(file);
++      spin_unlock(&object->lock);
+ 
+       cachefiles_begin_secure(cache, &saved_cred);
+       ret = __cachefiles_prepare_write(object, file, &pos, &len, true);
+       cachefiles_end_secure(cache, saved_cred);
+       if (ret < 0)
+-              return ret;
++              goto out;
+ 
+       trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len);
+       ret = __cachefiles_write(object, file, pos, iter, NULL, NULL);
+@@ -83,6 +89,8 @@ static ssize_t cachefiles_ondemand_fd_wr
+               kiocb->ki_pos += ret;
+       }
+ 
++out:
++      fput(file);
+       return ret;
+ }
+ 
+@@ -90,12 +98,22 @@ static loff_t cachefiles_ondemand_fd_lls
+                                           int whence)
+ {
+       struct cachefiles_object *object = filp->private_data;
+-      struct file *file = object->file;
++      struct file *file;
++      loff_t ret;
+ 
+-      if (!file)
++      spin_lock(&object->lock);
++      file = object->file;
++      if (!file) {
++              spin_unlock(&object->lock);
+               return -ENOBUFS;
++      }
++      get_file(file);
++      spin_unlock(&object->lock);
+ 
+-      return vfs_llseek(file, pos, whence);
++      ret = vfs_llseek(file, pos, whence);
++      fput(file);
++
++      return ret;
+ }
+ 
+ static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl,
diff --git a/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch b/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch

new file mode 100644 (file)

index 0000000..2811714
--- /dev/null
+++ b/queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch
@@ -0,0 +1,40 @@
+From 985b67cd86392310d9e9326de941c22fc9340eec Mon Sep 17 00:00:00 2001
+From: Lizhi Xu <lizhi.xu@windriver.com>
+Date: Wed, 5 Jun 2024 09:23:35 +0800
+Subject: ext4: filesystems without casefold feature cannot be mounted with siphash
+
+From: Lizhi Xu <lizhi.xu@windriver.com>
+
+commit 985b67cd86392310d9e9326de941c22fc9340eec upstream.
+
+When mounting the ext4 filesystem, if the default hash version is set to
+DX_HASH_SIPHASH but the casefold feature is not set, exit the mounting.
+
+Reported-by: syzbot+340581ba9dceb7e06fb3@syzkaller.appspotmail.com
+Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
+Link: https://patch.msgid.link/20240605012335.44086-1-lizhi.xu@windriver.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Bruno VERNAY <bruno.vernay@se.com>
+Signed-off-by: Victor Giraud <vgiraud.opensource@witekio.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -3627,6 +3627,14 @@ int ext4_feature_set_ok(struct super_blo
+       }
+ #endif
+ 
++      if (EXT4_SB(sb)->s_es->s_def_hash_version == DX_HASH_SIPHASH &&
++          !ext4_has_feature_casefold(sb)) {
++              ext4_msg(sb, KERN_ERR,
++                       "Filesystem without casefold feature cannot be "
++                       "mounted with siphash");
++              return 0;
++      }
++
+       if (readonly)
+               return 1;
+ 
diff --git a/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch b/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch

new file mode 100644 (file)

index 0000000..0b7ecd4
--- /dev/null
+++ b/queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch
@@ -0,0 +1,123 @@
+From 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 2 Aug 2024 13:29:40 -0700
+Subject: KVM: x86: Make x2APIC ID 100% readonly
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 upstream.
+
+Ignore the userspace provided x2APIC ID when fixing up APIC state for
+KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM.  Commit
+a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID
+register"), which added the fixup, didn't intend to allow userspace to
+modify the x2APIC ID.  In fact, that commit is when KVM first started
+treating the x2APIC ID as readonly, apparently to fix some race:
+
+ static inline u32 kvm_apic_id(struct kvm_lapic *apic)
+ {
+-       return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
++       /* To avoid a race between apic_base and following APIC_ID update when
++        * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
++        */
++       if (apic_x2apic_mode(apic))
++               return apic->vcpu->vcpu_id;
++
++       return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+ }
+
+Furthermore, KVM doesn't support delivering interrupts to vCPUs with a
+modified x2APIC ID, but KVM *does* return the modified value on a guest
+RDMSR and for KVM_GET_LAPIC.  I.e. no remotely sane setup can actually
+work with a modified x2APIC ID.
+
+Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map
+calculation, which expects the LDR to align with the x2APIC ID.
+
+  WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm]
+  CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014
+  RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm]
+  Call Trace:
+   <TASK>
+   kvm_apic_set_state+0x1cf/0x5b0 [kvm]
+   kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm]
+   kvm_vcpu_ioctl+0x663/0x8a0 [kvm]
+   __x64_sys_ioctl+0xb8/0xf0
+   do_syscall_64+0x56/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+  RIP: 0033:0x7fade8b9dd6f
+
+Unfortunately, the WARN can still trigger for other CPUs than the current
+one by racing against KVM_SET_LAPIC, so remove it completely.
+
+Reported-by: Michal Luczaj <mhal@rbox.co>
+Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co
+Reported-by: Haoyu Wu <haoyuwu254@gmail.com>
+Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com
+Reported-by: syzbot+545f1326f405db4e1c3e@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-ID: <20240802202941.344889-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: James Houghton <jthoughton@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/lapic.c |   22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -338,10 +338,8 @@ static void kvm_recalculate_logical_map(
+        * reversing the LDR calculation to get cluster of APICs, i.e. no
+        * additional work is required.
+        */
+-      if (apic_x2apic_mode(apic)) {
+-              WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
++      if (apic_x2apic_mode(apic))
+               return;
+-      }
+ 
+       if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
+                                                       &cluster, &mask))) {
+@@ -2964,18 +2962,28 @@ static int kvm_apic_state_fixup(struct k
+               struct kvm_lapic_state *s, bool set)
+ {
+       if (apic_x2apic_mode(vcpu->arch.apic)) {
++              u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
+               u32 *id = (u32 *)(s->regs + APIC_ID);
+               u32 *ldr = (u32 *)(s->regs + APIC_LDR);
+               u64 icr;
+ 
+               if (vcpu->kvm->arch.x2apic_format) {
+-                      if (*id != vcpu->vcpu_id)
++                      if (*id != x2apic_id)
+                               return -EINVAL;
+               } else {
++                      /*
++                       * Ignore the userspace value when setting APIC state.
++                       * KVM's model is that the x2APIC ID is readonly, e.g.
++                       * KVM only supports delivering interrupts to KVM's
++                       * version of the x2APIC ID.  However, for backwards
++                       * compatibility, don't reject attempts to set a
++                       * mismatched ID for userspace that hasn't opted into
++                       * x2apic_format.
++                       */
+                       if (set)
+-                              *id >>= 24;
++                              *id = x2apic_id;
+                       else
+-                              *id <<= 24;
++                              *id = x2apic_id << 24;
+               }
+ 
+               /*
+@@ -2984,7 +2992,7 @@ static int kvm_apic_state_fixup(struct k
+                * split to ICR+ICR2 in userspace for backwards compatibility.
+                */
+               if (set) {
+-                      *ldr = kvm_apic_calc_x2apic_ldr(*id);
++                      *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
+ 
+                       icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+                             (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
diff --git a/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch b/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch

new file mode 100644 (file)

index 0000000..2114e03
--- /dev/null
+++ b/queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch
@@ -0,0 +1,151 @@
+From 73b42dc69be8564d4951a14d00f827929fe5ef79 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 19 Jul 2024 16:51:00 -0700
+Subject: KVM: x86: Re-split x2APIC ICR into ICR+ICR2 for AMD (x2AVIC)
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 73b42dc69be8564d4951a14d00f827929fe5ef79 upstream.
+
+Re-introduce the "split" x2APIC ICR storage that KVM used prior to Intel's
+IPI virtualization support, but only for AMD.  While not stated anywhere
+in the APM, despite stating the ICR is a single 64-bit register, AMD CPUs
+store the 64-bit ICR as two separate 32-bit values in ICR and ICR2.  When
+IPI virtualization (IPIv on Intel, all AVIC flavors on AMD) is enabled,
+KVM needs to match CPU behavior as some ICR ICR writes will be handled by
+the CPU, not by KVM.
+
+Add a kvm_x86_ops knob to control the underlying format used by the CPU to
+store the x2APIC ICR, and tune it to AMD vs. Intel regardless of whether
+or not x2AVIC is enabled.  If KVM is handling all ICR writes, the storage
+format for x2APIC mode doesn't matter, and having the behavior follow AMD
+versus Intel will provide better test coverage and ease debugging.
+
+Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
+Cc: stable@vger.kernel.org
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+Link: https://lore.kernel.org/r/20240719235107.3023592-4-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+[JH: fixed conflict with vmx_x86_ops reshuffle due to missing commit 5f18c642ff7e2]
+Signed-off-by: James Houghton <jthoughton@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    2 +
+ arch/x86/kvm/lapic.c            |   42 ++++++++++++++++++++++++++++------------
+ arch/x86/kvm/svm/svm.c          |    2 +
+ arch/x86/kvm/vmx/vmx.c          |    2 +
+ 4 files changed, 36 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1650,6 +1650,8 @@ struct kvm_x86_ops {
+       void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+       void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+       bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
++
++      const bool x2apic_icr_is_split;
+       const unsigned long required_apicv_inhibits;
+       bool allow_apicv_in_x2apic_without_x2apic_virtualization;
+       void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2459,11 +2459,25 @@ int kvm_x2apic_icr_write(struct kvm_lapi
+       data &= ~APIC_ICR_BUSY;
+ 
+       kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+-      kvm_lapic_set_reg64(apic, APIC_ICR, data);
++      if (kvm_x86_ops.x2apic_icr_is_split) {
++              kvm_lapic_set_reg(apic, APIC_ICR, data);
++              kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
++      } else {
++              kvm_lapic_set_reg64(apic, APIC_ICR, data);
++      }
+       trace_kvm_apic_write(APIC_ICR, data);
+       return 0;
+ }
+ 
++static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
++{
++      if (kvm_x86_ops.x2apic_icr_is_split)
++              return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
++                     (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
++
++      return kvm_lapic_get_reg64(apic, APIC_ICR);
++}
++
+ /* emulate APIC access in a trap manner */
+ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+ {
+@@ -2481,7 +2495,7 @@ void kvm_apic_write_nodecode(struct kvm_
+        * maybe-unecessary write, and both are in the noise anyways.
+        */
+       if (apic_x2apic_mode(apic) && offset == APIC_ICR)
+-              WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)));
++              WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
+       else
+               kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+ }
+@@ -2988,18 +3002,22 @@ static int kvm_apic_state_fixup(struct k
+ 
+               /*
+                * In x2APIC mode, the LDR is fixed and based on the id.  And
+-               * ICR is internally a single 64-bit register, but needs to be
+-               * split to ICR+ICR2 in userspace for backwards compatibility.
++               * if the ICR is _not_ split, ICR is internally a single 64-bit
++               * register, but needs to be split to ICR+ICR2 in userspace for
++               * backwards compatibility.
+                */
+-              if (set) {
++              if (set)
+                       *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
+ 
+-                      icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+-                            (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+-                      __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+-              } else {
+-                      icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+-                      __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
++              if (!kvm_x86_ops.x2apic_icr_is_split) {
++                      if (set) {
++                              icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
++                                    (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
++                              __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
++                      } else {
++                              icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
++                              __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
++                      }
+               }
+       }
+ 
+@@ -3196,7 +3214,7 @@ static int kvm_lapic_msr_read(struct kvm
+       u32 low;
+ 
+       if (reg == APIC_ICR) {
+-              *data = kvm_lapic_get_reg64(apic, APIC_ICR);
++              *data = kvm_x2apic_icr_read(apic);
+               return 0;
+       }
+ 
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -5014,6 +5014,8 @@ static struct kvm_x86_ops svm_x86_ops __
+       .enable_nmi_window = svm_enable_nmi_window,
+       .enable_irq_window = svm_enable_irq_window,
+       .update_cr8_intercept = svm_update_cr8_intercept,
++
++      .x2apic_icr_is_split = true,
+       .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
+       .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
+       .apicv_post_state_restore = avic_apicv_post_state_restore,
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -8323,6 +8323,8 @@ static struct kvm_x86_ops vmx_x86_ops __
+       .enable_nmi_window = vmx_enable_nmi_window,
+       .enable_irq_window = vmx_enable_irq_window,
+       .update_cr8_intercept = vmx_update_cr8_intercept,
++
++      .x2apic_icr_is_split = false,
+       .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
+       .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
+       .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
diff --git a/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch b/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch

new file mode 100644 (file)

index 0000000..1fa4164
--- /dev/null
+++ b/queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch
@@ -0,0 +1,114 @@
+From stable+bounces-114455-greg=kroah.com@vger.kernel.org Sun Feb  9 18:42:14 2025
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sun,  9 Feb 2025 18:41:55 +0100
+Subject: mptcp: pm: only set fullmesh for subflow endp
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, syzbot+cd16e79c1e45f3fe0377@syzkaller.appspotmail.com, Mat Martineau <martineau@kernel.org>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250209174153.3388802-6-matttbe@kernel.org>
+
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+
+commit 1bb0d1348546ad059f55c93def34e67cb2a034a6 upstream.
+
+With the in-kernel path-manager, it is possible to change the 'fullmesh'
+flag. The code in mptcp_pm_nl_fullmesh() expects to change it only on
+'subflow' endpoints, to recreate more or less subflows using the linked
+address.
+
+Unfortunately, the set_flags() hook was a bit more permissive, and
+allowed 'implicit' endpoints to get the 'fullmesh' flag while it is not
+allowed before.
+
+That's what syzbot found, triggering the following warning:
+
+  WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 __mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline]
+  WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline]
+  WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline]
+  WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064
+  Modules linked in:
+  CPU: 0 UID: 0 PID: 6499 Comm: syz.1.413 Not tainted 6.13.0-rc5-syzkaller-00172-gd1bf27c4e176 #0
+  Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+  RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline]
+  RIP: 0010:mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline]
+  RIP: 0010:mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline]
+  RIP: 0010:mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064
+  Code: 01 00 00 49 89 c5 e8 fb 45 e8 f5 e9 b8 fc ff ff e8 f1 45 e8 f5 4c 89 f7 be 03 00 00 00 e8 44 1d 0b f9 eb a0 e8 dd 45 e8 f5 90 <0f> 0b 90 e9 17 ff ff ff 89 d9 80 e1 07 38 c1 0f 8c c9 fc ff ff 48
+  RSP: 0018:ffffc9000d307240 EFLAGS: 00010293
+  RAX: ffffffff8bb72e03 RBX: 0000000000000000 RCX: ffff88807da88000
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+  RBP: ffffc9000d307430 R08: ffffffff8bb72cf0 R09: 1ffff1100b842a5e
+  R10: dffffc0000000000 R11: ffffed100b842a5f R12: ffff88801e2e5ac0
+  R13: ffff88805c214800 R14: ffff88805c2152e8 R15: 1ffff1100b842a5d
+  FS:  00005555619f6500(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000020002840 CR3: 00000000247e6000 CR4: 00000000003526f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   <TASK>
+   genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline]
+   genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline]
+   genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210
+   netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2542
+   genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219
+   netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline]
+   netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1347
+   netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1891
+   sock_sendmsg_nosec net/socket.c:711 [inline]
+   __sock_sendmsg+0x221/0x270 net/socket.c:726
+   ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2583
+   ___sys_sendmsg net/socket.c:2637 [inline]
+   __sys_sendmsg+0x269/0x350 net/socket.c:2669
+   do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+   do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
+   entry_SYSCALL_64_after_hwframe+0x77/0x7f
+  RIP: 0033:0x7f5fe8785d29
+  Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
+  RSP: 002b:00007fff571f5558 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+  RAX: ffffffffffffffda RBX: 00007f5fe8975fa0 RCX: 00007f5fe8785d29
+  RDX: 0000000000000000 RSI: 0000000020000480 RDI: 0000000000000007
+  RBP: 00007f5fe8801b08 R08: 0000000000000000 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+  R13: 00007f5fe8975fa0 R14: 00007f5fe8975fa0 R15: 00000000000011f4
+   </TASK>
+
+Here, syzbot managed to set the 'fullmesh' flag on an 'implicit' and
+used -- according to 'id_avail_bitmap' -- endpoint, causing the PM to
+try decrement the local_addr_used counter which is only incremented for
+the 'subflow' endpoint.
+
+Note that 'no type' endpoints -- not 'subflow', 'signal', 'implicit' --
+are fine, because their ID will not be marked as used in the 'id_avail'
+bitmap, and setting 'fullmesh' can help forcing the creation of subflow
+when receiving an ADD_ADDR.
+
+Fixes: 73c762c1f07d ("mptcp: set fullmesh flag in pm_netlink")
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+cd16e79c1e45f3fe0377@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/6786ac51.050a0220.216c54.00a6.GAE@google.com
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/540
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20250123-net-mptcp-syzbot-issues-v1-2-af73258a726f@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Conflicts in pm_netlink.c, because the code has been moved around in
+  commit 6a42477fe449 ("mptcp: update set_flags interfaces"), but the
+  same fix can still be applied at the original place. ]
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm_netlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -2069,7 +2069,8 @@ int mptcp_pm_nl_set_flags(struct net *ne
+               return -EINVAL;
+       }
+       if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+-          (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
++          (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
++                           MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
+               spin_unlock_bh(&pernet->lock);
+               return -EINVAL;
+       }
diff --git a/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch b/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch

new file mode 100644 (file)

index 0000000..aab6e72
--- /dev/null
+++ b/queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch
@@ -0,0 +1,41 @@
+From stable+bounces-114456-greg=kroah.com@vger.kernel.org Sun Feb  9 18:42:14 2025
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sun,  9 Feb 2025 18:41:56 +0100
+Subject: mptcp: prevent excessive coalescing on receive
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Paolo Abeni <pabeni@redhat.com>, Mat Martineau <martineau@kernel.org>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250209174153.3388802-7-matttbe@kernel.org>
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 56b824eb49d6258aa0bad09a406ceac3f643cdae upstream.
+
+Currently the skb size after coalescing is only limited by the skb
+layout (the skb must not carry frag_list). A single coalesced skb
+covering several MSS can potentially fill completely the receive
+buffer. In such a case, the snd win will zero until the receive buffer
+will be empty again, affecting tput badly.
+
+Fixes: 8268ed4c9d19 ("mptcp: introduce and use mptcp_try_coalesce()")
+Cc: stable@vger.kernel.org # please delay 2 weeks after 6.13-final release
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-3-8608af434ceb@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -138,6 +138,7 @@ static bool mptcp_try_coalesce(struct so
+       int delta;
+ 
+       if (MPTCP_SKB_CB(from)->offset ||
++          ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
+           !skb_try_coalesce(to, from, &fragstolen, &delta))
+               return false;
+ 
diff --git a/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch b/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch

new file mode 100644 (file)

index 0000000..6628e52
--- /dev/null
+++ b/queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch
@@ -0,0 +1,217 @@
+From b0fce54b8c0d8e5f2b4c243c803c5996e73baee8 Mon Sep 17 00:00:00 2001
+From: Su Yue <glass.su@suse.com>
+Date: Mon, 6 Jan 2025 22:06:40 +0800
+Subject: ocfs2: check dir i_size in ocfs2_find_entry
+
+From: Su Yue <glass.su@suse.com>
+
+commit b0fce54b8c0d8e5f2b4c243c803c5996e73baee8 upstream.
+
+syz reports an out of bounds read:
+
+==================================================================
+BUG: KASAN: slab-out-of-bounds in ocfs2_match fs/ocfs2/dir.c:334
+[inline]
+BUG: KASAN: slab-out-of-bounds in ocfs2_search_dirblock+0x283/0x6e0
+fs/ocfs2/dir.c:367
+Read of size 1 at addr ffff88804d8b9982 by task syz-executor.2/14802
+
+CPU: 0 UID: 0 PID: 14802 Comm: syz-executor.2 Not tainted 6.13.0-rc4 #2
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
+04/01/2014
+Sched_ext: serialise (enabled+all), task: runnable_at=-10ms
+Call Trace:
+<TASK>
+__dump_stack lib/dump_stack.c:94 [inline]
+dump_stack_lvl+0x229/0x350 lib/dump_stack.c:120
+print_address_description mm/kasan/report.c:378 [inline]
+print_report+0x164/0x530 mm/kasan/report.c:489
+kasan_report+0x147/0x180 mm/kasan/report.c:602
+ocfs2_match fs/ocfs2/dir.c:334 [inline]
+ocfs2_search_dirblock+0x283/0x6e0 fs/ocfs2/dir.c:367
+ocfs2_find_entry_id fs/ocfs2/dir.c:414 [inline]
+ocfs2_find_entry+0x1143/0x2db0 fs/ocfs2/dir.c:1078
+ocfs2_find_files_on_disk+0x18e/0x530 fs/ocfs2/dir.c:1981
+ocfs2_lookup_ino_from_name+0xb6/0x110 fs/ocfs2/dir.c:2003
+ocfs2_lookup+0x30a/0xd40 fs/ocfs2/namei.c:122
+lookup_open fs/namei.c:3627 [inline]
+open_last_lookups fs/namei.c:3748 [inline]
+path_openat+0x145a/0x3870 fs/namei.c:3984
+do_filp_open+0xe9/0x1c0 fs/namei.c:4014
+do_sys_openat2+0x135/0x1d0 fs/open.c:1402
+do_sys_open fs/open.c:1417 [inline]
+__do_sys_openat fs/open.c:1433 [inline]
+__se_sys_openat fs/open.c:1428 [inline]
+__x64_sys_openat+0x15d/0x1c0 fs/open.c:1428
+do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+do_syscall_64+0xf6/0x210 arch/x86/entry/common.c:83
+entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7f01076903ad
+Code: c3 e8 a7 2b 00 00 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89
+f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01
+f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f01084acfc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
+RAX: ffffffffffffffda RBX: 00007f01077cbf80 RCX: 00007f01076903ad
+RDX: 0000000000105042 RSI: 0000000020000080 RDI: ffffffffffffff9c
+RBP: 00007f01077cbf80 R08: 0000000000000000 R09: 0000000000000000
+R10: 00000000000001ff R11: 0000000000000246 R12: 0000000000000000
+R13: 00007f01077cbf80 R14: 00007f010764fc90 R15: 00007f010848d000
+</TASK>
+==================================================================
+
+And a general protection fault in ocfs2_prepare_dir_for_insert:
+
+==================================================================
+loop0: detected capacity change from 0 to 32768
+JBD2: Ignoring recovery information on journal
+ocfs2: Mounting device (7,0) on (node local, slot 0) with ordered data
+mode.
+Oops: general protection fault, probably for non-canonical address
+0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
+KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
+CPU: 0 UID: 0 PID: 5096 Comm: syz-executor792 Not tainted
+6.11.0-rc4-syzkaller-00002-gb0da640826ba #0
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
+1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
+RIP: 0010:ocfs2_find_dir_space_id fs/ocfs2/dir.c:3406 [inline]
+RIP: 0010:ocfs2_prepare_dir_for_insert+0x3309/0x5c70 fs/ocfs2/dir.c:4280
+Code: 00 00 e8 2a 25 13 fe e9 ba 06 00 00 e8 20 25 13 fe e9 4f 01 00 00
+e8 16 25 13 fe 49 8d 7f 08 49 8d 5f 09 48 89 f8 48 c1 e8 03 <42> 0f b6
+04 20 84 c0 0f 85 bd 23 00 00 48 89 d8 48 c1 e8 03 42 0f
+RSP: 0018:ffffc9000af9f020 EFLAGS: 00010202
+RAX: 0000000000000001 RBX: 0000000000000009 RCX: ffff88801e27a440
+RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000008
+RBP: ffffc9000af9f830 R08: ffffffff8380395b R09: ffffffff838090a7
+R10: 0000000000000002 R11: ffff88801e27a440 R12: dffffc0000000000
+R13: ffff88803c660878 R14: f700000000000088 R15: 0000000000000000
+FS:  000055555a677380(0000) GS:ffff888020800000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000560bce569178 CR3: 000000001de5a000 CR4: 0000000000350ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<TASK>
+ocfs2_mknod+0xcaf/0x2b40 fs/ocfs2/namei.c:292
+vfs_mknod+0x36d/0x3b0 fs/namei.c:4088
+do_mknodat+0x3ec/0x5b0
+__do_sys_mknodat fs/namei.c:4166 [inline]
+__se_sys_mknodat fs/namei.c:4163 [inline]
+__x64_sys_mknodat+0xa7/0xc0 fs/namei.c:4163
+do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
+entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7f2dafda3a99
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 17 00 00 90 48 89
+f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08
+0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8
+64 89 01 48
+RSP: 002b:00007ffe336a6658 EFLAGS: 00000246 ORIG_RAX:
+0000000000000103
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
+00007f2dafda3a99
+RDX: 00000000000021c0 RSI: 0000000020000040 RDI:
+00000000ffffff9c
+RBP: 00007f2dafe1b5f0 R08: 0000000000004480 R09:
+000055555a6784c0
+R10: 0000000000000103 R11: 0000000000000246 R12:
+00007ffe336a6680
+R13: 00007ffe336a68a8 R14: 431bde82d7b634db R15:
+00007f2dafdec03b
+</TASK>
+==================================================================
+
+The two reports are all caused invalid negative i_size of dir inode.  For
+ocfs2, dir_inode can't be negative or zero.
+
+Here add a check in which is called by ocfs2_check_dir_for_entry().  It
+fixes the second report as ocfs2_check_dir_for_entry() must be called
+before ocfs2_prepare_dir_for_insert().  Also set a up limit for dir with
+OCFS2_INLINE_DATA_FL.  The i_size can't be great than blocksize.
+
+Link: https://lkml.kernel.org/r/20250106140640.92260-1-glass.su@suse.com
+Reported-by: Jiacheng Xu <stitch@zju.edu.cn>
+Link: https://lore.kernel.org/ocfs2-devel/17a04f01.1ae74.19436d003fc.Coremail.stitch@zju.edu.cn/T/#u
+Reported-by: syzbot+5a64828fcc4c2ad9b04f@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/all/0000000000005894f3062018caf1@google.com/T/
+Signed-off-by: Su Yue <glass.su@suse.com>
+Reviewed-by: Heming Zhao <heming.zhao@suse.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Jun Piao <piaojun@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/dir.c |   25 +++++++++++++++++++++----
+ 1 file changed, 21 insertions(+), 4 deletions(-)
+
+--- a/fs/ocfs2/dir.c
++++ b/fs/ocfs2/dir.c
+@@ -1065,26 +1065,39 @@ int ocfs2_find_entry(const char *name, i
+ {
+       struct buffer_head *bh;
+       struct ocfs2_dir_entry *res_dir = NULL;
++      int ret = 0;
+ 
+       if (ocfs2_dir_indexed(dir))
+               return ocfs2_find_entry_dx(name, namelen, dir, lookup);
+ 
++      if (unlikely(i_size_read(dir) <= 0)) {
++              ret = -EFSCORRUPTED;
++              mlog_errno(ret);
++              goto out;
++      }
+       /*
+        * The unindexed dir code only uses part of the lookup
+        * structure, so there's no reason to push it down further
+        * than this.
+        */
+-      if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
++      if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++              if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) {
++                      ret = -EFSCORRUPTED;
++                      mlog_errno(ret);
++                      goto out;
++              }
+               bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
+-      else
++      } else {
+               bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
++      }
+ 
+       if (bh == NULL)
+               return -ENOENT;
+ 
+       lookup->dl_leaf_bh = bh;
+       lookup->dl_entry = res_dir;
+-      return 0;
++out:
++      return ret;
+ }
+ 
+ /*
+@@ -2012,6 +2025,7 @@ int ocfs2_lookup_ino_from_name(struct in
+  *
+  * Return 0 if the name does not exist
+  * Return -EEXIST if the directory contains the name
++ * Return -EFSCORRUPTED if found corruption
+  *
+  * Callers should have i_rwsem + a cluster lock on dir
+  */
+@@ -2025,9 +2039,12 @@ int ocfs2_check_dir_for_entry(struct ino
+       trace_ocfs2_check_dir_for_entry(
+               (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
+ 
+-      if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
++      ret = ocfs2_find_entry(name, namelen, dir, &lookup);
++      if (ret == 0) {
+               ret = -EEXIST;
+               mlog_errno(ret);
++      } else if (ret == -ENOENT) {
++              ret = 0;
+       }
+ 
+       ocfs2_free_dir_lookup_result(&lookup);
diff --git a/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch

new file mode 100644 (file)

index 0000000..e3a5ef4
--- /dev/null
+++ b/queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
@@ -0,0 +1,36 @@
+From stable+bounces-114133-greg=kroah.com@vger.kernel.org Thu Feb  6 17:22:09 2025
+From: Koichiro Den <koichiro.den@canonical.com>
+Date: Fri,  7 Feb 2025 01:21:30 +0900
+Subject: Revert "btrfs: avoid monopolizing a core when activating a swap file"
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: wqu@suse.com, fdmanana@suse.com, dsterba@suse.com
+Message-ID: <20250206162131.1387235-1-koichiro.den@canonical.com>
+
+From: Koichiro Den <koichiro.den@canonical.com>
+
+This reverts commit 6e1a8225930719a9f352d56320214e33e2dde0a6.
+
+The backport for linux-6.6.y, commit 6e1a82259307 ("btrfs: avoid
+monopolizing a core when activating a swap file"), inserted
+cond_resched() in the wrong location.
+
+Revert it now; a subsequent commit will re-backport the original patch.
+
+Fixes: 6e1a82259307 ("btrfs: avoid monopolizing a core when activating a swap file") # linux-6.6.y
+Signed-off-by: Koichiro Den <koichiro.den@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7153,8 +7153,6 @@ noinline int can_nocow_extent(struct ino
+                       ret = -EAGAIN;
+                       goto out;
+               }
+-
+-              cond_resched();
+       }
+ 
+       if (orig_start)
diff --git a/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch b/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch

new file mode 100644 (file)

index 0000000..610f568
--- /dev/null
+++ b/queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch
@@ -0,0 +1,40 @@
+From stable+bounces-114457-greg=kroah.com@vger.kernel.org Sun Feb  9 18:42:17 2025
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Sun,  9 Feb 2025 18:41:57 +0100
+Subject: selftests: mptcp: join: fix AF_INET6 variable
+To: mptcp@lists.linux.dev, stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Message-ID: <20250209174153.3388802-8-matttbe@kernel.org>
+
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+
+The Fixes commit is a backport renaming a variable, from AF_INET6 to
+MPTCP_LIB_AF_INET6.
+
+The commit has been applied without conflicts, except that it missed one
+extra variable that was in v6.6, but not in the version linked to the
+Fixes commit.
+
+This variable has then been renamed too to avoid these errors:
+
+  LISTENER_CREATED 10.0.2.1:10100     ./mptcp_join.sh: line 2944: [: 2: unary operator expected
+  LISTENER_CLOSED  10.0.2.1:10100     ./mptcp_join.sh: line 2944: [: 2: unary operator expected
+
+Fixes: a17d1419126b ("selftests: mptcp: declare event macros in mptcp_lib")
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -2941,7 +2941,7 @@ verify_listener_events()
+       type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
+       family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
+       sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
+-      if [ $family ] && [ $family = $AF_INET6 ]; then
++      if [ $family ] && [ $family = $MPTCP_LIB_AF_INET6 ]; then
+               saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
+       else
+               saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
diff --git a/queue-6.6/series b/queue-6.6/series

index c28751dfdb51157c546a0f2e19b6fe2345d8f3b8..37682a225f21aeba9cdcd9c701217db37ce86687 100644 (file)
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -260,3 +260,15 @@ mips-ftrace-declare-ftrace_get_parent_ra_addr-as-static.patch
  spi-atmel-quadspi-create-atmel_qspi_ops-to-support-newer-soc-families.patch
  spi-atmel-qspi-memory-barriers-after-memory-mapped-i-o.patch
  net-ncsi-use-dev_set_mac_address-for-get-mc-mac-address-handling.patch
+ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch
+revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
+btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
+ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch
+cachefiles-fix-null-pointer-dereference-in-object-file.patch
+mptcp-pm-only-set-fullmesh-for-subflow-endp.patch
+mptcp-prevent-excessive-coalescing-on-receive.patch
+selftests-mptcp-join-fix-af_inet6-variable.patch
+x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch
+tty-xilinx_uartps-split-sysrq-handling.patch
+kvm-x86-make-x2apic-id-100-readonly.patch
+kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch
diff --git a/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch b/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch

new file mode 100644 (file)

index 0000000..00bbdd4
--- /dev/null
+++ b/queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch
@@ -0,0 +1,77 @@
+From b06f388994500297bb91be60ffaf6825ecfd2afe Mon Sep 17 00:00:00 2001
+From: Sean Anderson <sean.anderson@linux.dev>
+Date: Fri, 10 Jan 2025 16:38:22 -0500
+Subject: tty: xilinx_uartps: split sysrq handling
+
+From: Sean Anderson <sean.anderson@linux.dev>
+
+commit b06f388994500297bb91be60ffaf6825ecfd2afe upstream.
+
+lockdep detects the following circular locking dependency:
+
+CPU 0                      CPU 1
+========================== ============================
+cdns_uart_isr()            printk()
+  uart_port_lock(port)       console_lock()
+                            cdns_uart_console_write()
+                               if (!port->sysrq)
+                                 uart_port_lock(port)
+  uart_handle_break()
+    port->sysrq = ...
+  uart_handle_sysrq_char()
+    printk()
+      console_lock()
+
+The fixed commit attempts to avoid this situation by only taking the
+port lock in cdns_uart_console_write if port->sysrq unset. However, if
+(as shown above) cdns_uart_console_write runs before port->sysrq is set,
+then it will try to take the port lock anyway. This may result in a
+deadlock.
+
+Fix this by splitting sysrq handling into two parts. We use the prepare
+helper under the port lock and defer handling until we release the lock.
+
+Fixes: 74ea66d4ca06 ("tty: xuartps: Improve sysrq handling")
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Cc: stable@vger.kernel.org # c980248179d: serial: xilinx_uartps: Use port lock wrappers
+Acked-by: John Ogness <john.ogness@linutronix.de>
+Link: https://lore.kernel.org/r/20250110213822.2107462-1-sean.anderson@linux.dev
+Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/xilinx_uartps.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/drivers/tty/serial/xilinx_uartps.c
++++ b/drivers/tty/serial/xilinx_uartps.c
+@@ -268,7 +268,7 @@ static void cdns_uart_handle_rx(void *de
+                               continue;
+               }
+ 
+-              if (uart_handle_sysrq_char(port, data))
++              if (uart_prepare_sysrq_char(port, data))
+                       continue;
+ 
+               if (is_rxbs_support) {
+@@ -369,7 +369,7 @@ static irqreturn_t cdns_uart_isr(int irq
+           !(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_RX_DIS))
+               cdns_uart_handle_rx(dev_id, isrstatus);
+ 
+-      spin_unlock(&port->lock);
++      uart_unlock_and_check_sysrq(port);
+       return IRQ_HANDLED;
+ }
+ 
+@@ -1229,10 +1229,8 @@ static void cdns_uart_console_write(stru
+       unsigned int imr, ctrl;
+       int locked = 1;
+ 
+-      if (port->sysrq)
+-              locked = 0;
+-      else if (oops_in_progress)
+-              locked = spin_trylock_irqsave(&port->lock, flags);
++      if (oops_in_progress)
++              locked = uart_port_trylock_irqsave(port, &flags);
+       else
+               spin_lock_irqsave(&port->lock, flags);
+ 
diff --git a/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch b/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch

new file mode 100644 (file)

index 0000000..5257172
--- /dev/null
+++ b/queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch
@@ -0,0 +1,77 @@
+From cc31744a294584a36bf764a0ffa3255a8e69f036 Mon Sep 17 00:00:00 2001
+From: Steve Wahl <steve.wahl@hpe.com>
+Date: Wed, 17 Jul 2024 16:31:21 -0500
+Subject: x86/mm/ident_map: Use gbpages only where full GB page should be mapped.
+
+From: Steve Wahl <steve.wahl@hpe.com>
+
+commit cc31744a294584a36bf764a0ffa3255a8e69f036 upstream.
+
+When ident_pud_init() uses only GB pages to create identity maps, large
+ranges of addresses not actually requested can be included in the resulting
+table; a 4K request will map a full GB.  This can include a lot of extra
+address space past that requested, including areas marked reserved by the
+BIOS.  That allows processor speculation into reserved regions, that on UV
+systems can cause system halts.
+
+Only use GB pages when map creation requests include the full GB page of
+space.  Fall back to using smaller 2M pages when only portions of a GB page
+are included in the request.
+
+No attempt is made to coalesce mapping requests. If a request requires a
+map entry at the 2M (pmd) level, subsequent mapping requests within the
+same 1G region will also be at the pmd level, even if adjacent or
+overlapping such requests could have been combined to map a full GB page.
+Existing usage starts with larger regions and then adds smaller regions, so
+this should not have any great consequence.
+
+Signed-off-by: Steve Wahl <steve.wahl@hpe.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Pavin Joseph <me@pavinjoseph.com>
+Tested-by: Sarah Brofeldt <srhb@dbc.dk>
+Tested-by: Eric Hagberg <ehagberg@gmail.com>
+Link: https://lore.kernel.org/all/20240717213121.3064030-3-steve.wahl@hpe.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/ident_map.c |   23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/ident_map.c
++++ b/arch/x86/mm/ident_map.c
+@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_map
+       for (; addr < end; addr = next) {
+               pud_t *pud = pud_page + pud_index(addr);
+               pmd_t *pmd;
++              bool use_gbpage;
+ 
+               next = (addr & PUD_MASK) + PUD_SIZE;
+               if (next > end)
+                       next = end;
+ 
+-              if (info->direct_gbpages) {
+-                      pud_t pudval;
++              /* if this is already a gbpage, this portion is already mapped */
++              if (pud_leaf(*pud))
++                      continue;
++
++              /* Is using a gbpage allowed? */
++              use_gbpage = info->direct_gbpages;
+ 
+-                      if (pud_present(*pud))
+-                              continue;
++              /* Don't use gbpage if it maps more than the requested region. */
++              /* at the begining: */
++              use_gbpage &= ((addr & ~PUD_MASK) == 0);
++              /* ... or at the end: */
++              use_gbpage &= ((next & ~PUD_MASK) == 0);
++
++              /* Never overwrite existing mappings */
++              use_gbpage &= !pud_present(*pud);
++
++              if (use_gbpage) {
++                      pud_t pudval;
+ 
+-                      addr &= PUD_MASK;
+                       pudval = __pud((addr - info->offset) | info->page_flag);
+                       set_pud(pud, pudval);
+                       continue;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 13 Feb 2025 13:13:47 +0000 (14:13 +0100)
queue-6.6/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/cachefiles-fix-null-pointer-dereference-in-object-file.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/ext4-filesystems-without-casefold-feature-cannot-be-mounted-with-siphash.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/kvm-x86-make-x2apic-id-100-readonly.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/kvm-x86-re-split-x2apic-icr-into-icr-icr2-for-amd-x2avic.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/mptcp-pm-only-set-fullmesh-for-subflow-endp.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/mptcp-prevent-excessive-coalescing-on-receive.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/ocfs2-check-dir-i_size-in-ocfs2_find_entry.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/revert-btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/selftests-mptcp-join-fix-af_inet6-variable.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series		patch \| blob \| blame \| history
queue-6.6/tty-xilinx_uartps-split-sysrq-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch	[new file with mode: 0644]	patch \| blob