]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 8 Sep 2024 12:24:11 +0000 (14:24 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 8 Sep 2024 12:24:11 +0000 (14:24 +0200)
added patches:
btrfs-qgroup-don-t-use-extent-changeset-when-not-needed.patch
btrfs-zoned-handle-broken-write-pointer-on-zones.patch
drm-xe-gsc-do-not-attempt-to-load-the-gsc-multiple-times.patch
eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch
ila-call-nf_unregister_net_hooks-sooner.patch
net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch
net-mctp-serial-fix-missing-escapes-on-transmit.patch
nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch
nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch
nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch
revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch
revert-wifi-ath11k-restore-country-code-during-resume.patch
revert-wifi-ath11k-support-hibernation.patch
sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch
tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch
x86-apic-make-x2apic_disable-work-correctly.patch
x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch

18 files changed:
queue-6.10/btrfs-qgroup-don-t-use-extent-changeset-when-not-needed.patch [new file with mode: 0644]
queue-6.10/btrfs-zoned-handle-broken-write-pointer-on-zones.patch [new file with mode: 0644]
queue-6.10/drm-xe-gsc-do-not-attempt-to-load-the-gsc-multiple-times.patch [new file with mode: 0644]
queue-6.10/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch [new file with mode: 0644]
queue-6.10/ila-call-nf_unregister_net_hooks-sooner.patch [new file with mode: 0644]
queue-6.10/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch [new file with mode: 0644]
queue-6.10/net-mctp-serial-fix-missing-escapes-on-transmit.patch [new file with mode: 0644]
queue-6.10/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch [new file with mode: 0644]
queue-6.10/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch [new file with mode: 0644]
queue-6.10/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch [new file with mode: 0644]
queue-6.10/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch [new file with mode: 0644]
queue-6.10/revert-wifi-ath11k-restore-country-code-during-resume.patch [new file with mode: 0644]
queue-6.10/revert-wifi-ath11k-support-hibernation.patch [new file with mode: 0644]
queue-6.10/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch [new file with mode: 0644]
queue-6.10/series
queue-6.10/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch [new file with mode: 0644]
queue-6.10/x86-apic-make-x2apic_disable-work-correctly.patch [new file with mode: 0644]
queue-6.10/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch [new file with mode: 0644]

diff --git a/queue-6.10/btrfs-qgroup-don-t-use-extent-changeset-when-not-needed.patch b/queue-6.10/btrfs-qgroup-don-t-use-extent-changeset-when-not-needed.patch
new file mode 100644 (file)
index 0000000..1983962
--- /dev/null
@@ -0,0 +1,46 @@
+From c346c629765ab982967017e2ae859156d0e235cf Mon Sep 17 00:00:00 2001
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Wed, 28 Aug 2024 19:14:11 +0300
+Subject: btrfs: qgroup: don't use extent changeset when not needed
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+commit c346c629765ab982967017e2ae859156d0e235cf upstream.
+
+The local extent changeset is passed to clear_record_extent_bits() where
+it may have some additional memory dynamically allocated for ulist. When
+qgroup is disabled, the memory is leaked because in this case the
+changeset is not released upon __btrfs_qgroup_release_data() return.
+
+Since the recorded contents of the changeset are not used thereafter, just
+don't pass it.
+
+Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
+
+Reported-by: syzbot+81670362c283f3dd889c@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/000000000000aa8c0c060ade165e@google.com
+Fixes: af0e2aab3b70 ("btrfs: qgroup: flush reservations during quota disable")
+CC: stable@vger.kernel.org # 6.10+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -4269,10 +4269,9 @@ static int __btrfs_qgroup_release_data(s
+       int ret;
+       if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
+-              extent_changeset_init(&changeset);
+               return clear_record_extent_bits(&inode->io_tree, start,
+                                               start + len - 1,
+-                                              EXTENT_QGROUP_RESERVED, &changeset);
++                                              EXTENT_QGROUP_RESERVED, NULL);
+       }
+       /* In release case, we shouldn't have @reserved */
diff --git a/queue-6.10/btrfs-zoned-handle-broken-write-pointer-on-zones.patch b/queue-6.10/btrfs-zoned-handle-broken-write-pointer-on-zones.patch
new file mode 100644 (file)
index 0000000..2d4feea
--- /dev/null
@@ -0,0 +1,118 @@
+From b1934cd6069538db2255dc94ba573771ecf3b560 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Sat, 31 Aug 2024 01:32:49 +0900
+Subject: btrfs: zoned: handle broken write pointer on zones
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit b1934cd6069538db2255dc94ba573771ecf3b560 upstream.
+
+Btrfs rejects to mount a FS if it finds a block group with a broken write
+pointer (e.g, unequal write pointers on two zones of RAID1 block group).
+Since such case can happen easily with a power-loss or crash of a system,
+we need to handle the case more gently.
+
+Handle such block group by making it unallocatable, so that there will be
+no writes into it. That can be done by setting the allocation pointer at
+the end of allocating region (= block_group->zone_capacity). Then, existing
+code handle zone_unusable properly.
+
+Having proper zone_capacity is necessary for the change. So, set it as fast
+as possible.
+
+We cannot handle RAID0 and RAID10 case like this. But, they are anyway
+unable to read because of a missing stripe.
+
+Fixes: 265f7237dd25 ("btrfs: zoned: allow DUP on meta-data block groups")
+Fixes: 568220fa9657 ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree")
+CC: stable@vger.kernel.org # 6.1+
+Reported-by: HAN Yuwei <hrx@bupt.moe>
+Cc: Xuefer <xuefer@gmail.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |   30 +++++++++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1408,6 +1408,8 @@ static int btrfs_load_block_group_dup(st
+               return -EINVAL;
+       }
++      bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
++
+       if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
+               btrfs_err(bg->fs_info,
+                         "zoned: cannot recover write pointer for zone %llu",
+@@ -1434,7 +1436,6 @@ static int btrfs_load_block_group_dup(st
+       }
+       bg->alloc_offset = zone_info[0].alloc_offset;
+-      bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
+       return 0;
+ }
+@@ -1452,6 +1453,9 @@ static int btrfs_load_block_group_raid1(
+               return -EINVAL;
+       }
++      /* In case a device is missing we have a cap of 0, so don't use it. */
++      bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
++
+       for (i = 0; i < map->num_stripes; i++) {
+               if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
+                   zone_info[i].alloc_offset == WP_CONVENTIONAL)
+@@ -1473,9 +1477,6 @@ static int btrfs_load_block_group_raid1(
+                       if (test_bit(0, active))
+                               set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
+               }
+-              /* In case a device is missing we have a cap of 0, so don't use it. */
+-              bg->zone_capacity = min_not_zero(zone_info[0].capacity,
+-                                               zone_info[1].capacity);
+       }
+       if (zone_info[0].alloc_offset != WP_MISSING_DEV)
+@@ -1565,6 +1566,7 @@ int btrfs_load_block_group_zone_info(str
+       unsigned long *active = NULL;
+       u64 last_alloc = 0;
+       u32 num_sequential = 0, num_conventional = 0;
++      u64 profile;
+       if (!btrfs_is_zoned(fs_info))
+               return 0;
+@@ -1625,7 +1627,8 @@ int btrfs_load_block_group_zone_info(str
+               }
+       }
+-      switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
++      profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
++      switch (profile) {
+       case 0: /* single */
+               ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
+               break;
+@@ -1652,6 +1655,23 @@ int btrfs_load_block_group_zone_info(str
+               goto out;
+       }
++      if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
++          profile != BTRFS_BLOCK_GROUP_RAID10) {
++              /*
++               * Detected broken write pointer.  Make this block group
++               * unallocatable by setting the allocation pointer at the end of
++               * allocatable region. Relocating this block group will fix the
++               * mismatch.
++               *
++               * Currently, we cannot handle RAID0 or RAID10 case like this
++               * because we don't have a proper zone_capacity value. But,
++               * reading from this block group won't work anyway by a missing
++               * stripe.
++               */
++              cache->alloc_offset = cache->zone_capacity;
++              ret = 0;
++      }
++
+ out:
+       /* Reject non SINGLE data profiles without RST */
+       if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
diff --git a/queue-6.10/drm-xe-gsc-do-not-attempt-to-load-the-gsc-multiple-times.patch b/queue-6.10/drm-xe-gsc-do-not-attempt-to-load-the-gsc-multiple-times.patch
new file mode 100644 (file)
index 0000000..e6adaf9
--- /dev/null
@@ -0,0 +1,91 @@
+From 529bf8d1118bbaa1aa835563a22b0b5c64ca9d68 Mon Sep 17 00:00:00 2001
+From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Date: Wed, 28 Aug 2024 14:51:54 -0700
+Subject: drm/xe/gsc: Do not attempt to load the GSC multiple times
+
+From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+
+commit 529bf8d1118bbaa1aa835563a22b0b5c64ca9d68 upstream.
+
+The GSC HW is only reset by driver FLR or D3cold entry. We don't support
+the former at runtime, while the latter is only supported on DGFX, for
+which we don't support GSC. Therefore, if GSC failed to load previously
+there is no need to try again because the HW is stuck in the error state.
+
+An assert has been added so that if we ever add DGFX support we'll know
+we need to handle the D3 case.
+
+v2: use "< 0" instead of "!= 0" in the FW state error check (Julia).
+
+Fixes: dd0e89e5edc2 ("drm/xe/gsc: GSC FW load")
+Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Cc: John Harrison <John.C.Harrison@Intel.com>
+Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
+Cc: <stable@vger.kernel.org> # v6.8+
+Reviewed-by: Julia Filipchuk <julia.filipchuk@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240828215158.2743994-2-daniele.ceraolospurio@intel.com
+(cherry picked from commit 2160f6f6e3cf6893a83357c3b82ff8589bdc0f08)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/xe/xe_gsc.c   |   12 ++++++++++++
+ drivers/gpu/drm/xe/xe_uc_fw.h |    9 +++++++--
+ 2 files changed, 19 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/xe/xe_gsc.c
++++ b/drivers/gpu/drm/xe/xe_gsc.c
+@@ -511,10 +511,22 @@ out_bo:
+ void xe_gsc_load_start(struct xe_gsc *gsc)
+ {
+       struct xe_gt *gt = gsc_to_gt(gsc);
++      struct xe_device *xe = gt_to_xe(gt);
+       if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
+               return;
++      /*
++       * The GSC HW is only reset by driver FLR or D3cold entry. We don't
++       * support the former at runtime, while the latter is only supported on
++       * DGFX, for which we don't support GSC. Therefore, if GSC failed to
++       * load previously there is no need to try again because the HW is
++       * stuck in the error state.
++       */
++      xe_assert(xe, !IS_DGFX(xe));
++      if (xe_uc_fw_is_in_error_state(&gsc->fw))
++              return;
++
+       /* GSC FW survives GT reset and D3Hot */
+       if (gsc_fw_is_loaded(gt)) {
+               xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+--- a/drivers/gpu/drm/xe/xe_uc_fw.h
++++ b/drivers/gpu/drm/xe/xe_uc_fw.h
+@@ -65,7 +65,7 @@ const char *xe_uc_fw_status_repr(enum xe
+       return "<invalid>";
+ }
+-static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
++static inline int xe_uc_fw_status_to_error(const enum xe_uc_fw_status status)
+ {
+       switch (status) {
+       case XE_UC_FIRMWARE_NOT_SUPPORTED:
+@@ -108,7 +108,7 @@ static inline const char *xe_uc_fw_type_
+ }
+ static inline enum xe_uc_fw_status
+-__xe_uc_fw_status(struct xe_uc_fw *uc_fw)
++__xe_uc_fw_status(const struct xe_uc_fw *uc_fw)
+ {
+       /* shouldn't call this before checking hw/blob availability */
+       XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED);
+@@ -156,6 +156,11 @@ static inline bool xe_uc_fw_is_overridde
+       return uc_fw->user_overridden;
+ }
++static inline bool xe_uc_fw_is_in_error_state(const struct xe_uc_fw *uc_fw)
++{
++      return xe_uc_fw_status_to_error(__xe_uc_fw_status(uc_fw)) < 0;
++}
++
+ static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw)
+ {
+       if (xe_uc_fw_is_loaded(uc_fw))
diff --git a/queue-6.10/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch b/queue-6.10/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch
new file mode 100644 (file)
index 0000000..0b933ef
--- /dev/null
@@ -0,0 +1,112 @@
+From d2603279c7d645bf0d11fa253b23f1ab48fc8d3c Mon Sep 17 00:00:00 2001
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Wed, 4 Sep 2024 13:16:05 -0400
+Subject: eventfs: Use list_del_rcu() for SRCU protected list variable
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+commit d2603279c7d645bf0d11fa253b23f1ab48fc8d3c upstream.
+
+Chi Zhiling reported:
+
+  We found a null pointer accessing in tracefs[1], the reason is that the
+  variable 'ei_child' is set to LIST_POISON1, that means the list was
+  removed in eventfs_remove_rec. so when access the ei_child->is_freed, the
+  panic triggered.
+
+  by the way, the following script can reproduce this panic
+
+  loop1 (){
+      while true
+      do
+          echo "p:kp submit_bio" > /sys/kernel/debug/tracing/kprobe_events
+          echo "" > /sys/kernel/debug/tracing/kprobe_events
+      done
+  }
+  loop2 (){
+      while true
+      do
+          tree /sys/kernel/debug/tracing/events/kprobes/
+      done
+  }
+  loop1 &
+  loop2
+
+  [1]:
+  [ 1147.959632][T17331] Unable to handle kernel paging request at virtual address dead000000000150
+  [ 1147.968239][T17331] Mem abort info:
+  [ 1147.971739][T17331]   ESR = 0x0000000096000004
+  [ 1147.976172][T17331]   EC = 0x25: DABT (current EL), IL = 32 bits
+  [ 1147.982171][T17331]   SET = 0, FnV = 0
+  [ 1147.985906][T17331]   EA = 0, S1PTW = 0
+  [ 1147.989734][T17331]   FSC = 0x04: level 0 translation fault
+  [ 1147.995292][T17331] Data abort info:
+  [ 1147.998858][T17331]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+  [ 1148.005023][T17331]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+  [ 1148.010759][T17331]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+  [ 1148.016752][T17331] [dead000000000150] address between user and kernel address ranges
+  [ 1148.024571][T17331] Internal error: Oops: 0000000096000004 [#1] SMP
+  [ 1148.030825][T17331] Modules linked in: team_mode_loadbalance team nlmon act_gact cls_flower sch_ingress bonding tls macvlan dummy ib_core bridge stp llc veth amdgpu amdxcp mfd_core gpu_sched drm_exec drm_buddy radeon crct10dif_ce video drm_suballoc_helper ghash_ce drm_ttm_helper sha2_ce ttm sha256_arm64 i2c_algo_bit sha1_ce sbsa_gwdt cp210x drm_display_helper cec sr_mod cdrom drm_kms_helper binfmt_misc sg loop fuse drm dm_mod nfnetlink ip_tables autofs4 [last unloaded: tls]
+  [ 1148.072808][T17331] CPU: 3 PID: 17331 Comm: ls Tainted: G        W         ------- ----  6.6.43 #2
+  [ 1148.081751][T17331] Source Version: 21b3b386e948bedd29369af66f3e98ab01b1c650
+  [ 1148.088783][T17331] Hardware name: Greatwall GW-001M1A-FTF/GW-001M1A-FTF, BIOS KunLun BIOS V4.0 07/16/2020
+  [ 1148.098419][T17331] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+  [ 1148.106060][T17331] pc : eventfs_iterate+0x2c0/0x398
+  [ 1148.111017][T17331] lr : eventfs_iterate+0x2fc/0x398
+  [ 1148.115969][T17331] sp : ffff80008d56bbd0
+  [ 1148.119964][T17331] x29: ffff80008d56bbf0 x28: ffff001ff5be2600 x27: 0000000000000000
+  [ 1148.127781][T17331] x26: ffff001ff52ca4e0 x25: 0000000000009977 x24: dead000000000100
+  [ 1148.135598][T17331] x23: 0000000000000000 x22: 000000000000000b x21: ffff800082645f10
+  [ 1148.143415][T17331] x20: ffff001fddf87c70 x19: ffff80008d56bc90 x18: 0000000000000000
+  [ 1148.151231][T17331] x17: 0000000000000000 x16: 0000000000000000 x15: ffff001ff52ca4e0
+  [ 1148.159048][T17331] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
+  [ 1148.166864][T17331] x11: 0000000000000000 x10: 0000000000000000 x9 : ffff8000804391d0
+  [ 1148.174680][T17331] x8 : 0000000180000000 x7 : 0000000000000018 x6 : 0000aaab04b92862
+  [ 1148.182498][T17331] x5 : 0000aaab04b92862 x4 : 0000000080000000 x3 : 0000000000000068
+  [ 1148.190314][T17331] x2 : 000000000000000f x1 : 0000000000007ea8 x0 : 0000000000000001
+  [ 1148.198131][T17331] Call trace:
+  [ 1148.201259][T17331]  eventfs_iterate+0x2c0/0x398
+  [ 1148.205864][T17331]  iterate_dir+0x98/0x188
+  [ 1148.210036][T17331]  __arm64_sys_getdents64+0x78/0x160
+  [ 1148.215161][T17331]  invoke_syscall+0x78/0x108
+  [ 1148.219593][T17331]  el0_svc_common.constprop.0+0x48/0xf0
+  [ 1148.224977][T17331]  do_el0_svc+0x24/0x38
+  [ 1148.228974][T17331]  el0_svc+0x40/0x168
+  [ 1148.232798][T17331]  el0t_64_sync_handler+0x120/0x130
+  [ 1148.237836][T17331]  el0t_64_sync+0x1a4/0x1a8
+  [ 1148.242182][T17331] Code: 54ffff6c f9400676 910006d6 f9000676 (b9405300)
+  [ 1148.248955][T17331] ---[ end trace 0000000000000000 ]---
+
+The issue is that list_del() is used on an SRCU protected list variable
+before the synchronization occurs. This can poison the list pointers while
+there is a reader iterating the list.
+
+This is simply fixed by using list_del_rcu() that is specifically made for
+this purpose.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240829085025.3600021-1-chizhiling@163.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Link: https://lore.kernel.org/20240904131605.640d42b1@gandalf.local.home
+Fixes: 43aa6f97c2d03 ("eventfs: Get rid of dentry pointers without refcounts")
+Reported-by: Chi Zhiling <chizhiling@kylinos.cn>
+Tested-by: Chi Zhiling <chizhiling@kylinos.cn>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/event_inode.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/tracefs/event_inode.c
++++ b/fs/tracefs/event_inode.c
+@@ -862,7 +862,7 @@ static void eventfs_remove_rec(struct ev
+       list_for_each_entry(ei_child, &ei->children, list)
+               eventfs_remove_rec(ei_child, level + 1);
+-      list_del(&ei->list);
++      list_del_rcu(&ei->list);
+       free_ei(ei);
+ }
diff --git a/queue-6.10/ila-call-nf_unregister_net_hooks-sooner.patch b/queue-6.10/ila-call-nf_unregister_net_hooks-sooner.patch
new file mode 100644 (file)
index 0000000..c34dc3b
--- /dev/null
@@ -0,0 +1,194 @@
+From 031ae72825cef43e4650140b800ad58bf7a6a466 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 4 Sep 2024 14:44:18 +0000
+Subject: ila: call nf_unregister_net_hooks() sooner
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 031ae72825cef43e4650140b800ad58bf7a6a466 upstream.
+
+syzbot found an use-after-free Read in ila_nf_input [1]
+
+Issue here is that ila_xlat_exit_net() frees the rhashtable,
+then call nf_unregister_net_hooks().
+
+It should be done in the reverse way, with a synchronize_rcu().
+
+This is a good match for a pre_exit() method.
+
+[1]
+ BUG: KASAN: use-after-free in rht_key_hashfn include/linux/rhashtable.h:159 [inline]
+ BUG: KASAN: use-after-free in __rhashtable_lookup include/linux/rhashtable.h:604 [inline]
+ BUG: KASAN: use-after-free in rhashtable_lookup include/linux/rhashtable.h:646 [inline]
+ BUG: KASAN: use-after-free in rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672
+Read of size 4 at addr ffff888064620008 by task ksoftirqd/0/16
+
+CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.11.0-rc4-syzkaller-00238-g2ad6d23f465a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024
+Call Trace:
+ <TASK>
+  __dump_stack lib/dump_stack.c:93 [inline]
+  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119
+  print_address_description mm/kasan/report.c:377 [inline]
+  print_report+0x169/0x550 mm/kasan/report.c:488
+  kasan_report+0x143/0x180 mm/kasan/report.c:601
+  rht_key_hashfn include/linux/rhashtable.h:159 [inline]
+  __rhashtable_lookup include/linux/rhashtable.h:604 [inline]
+  rhashtable_lookup include/linux/rhashtable.h:646 [inline]
+  rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672
+  ila_lookup_wildcards net/ipv6/ila/ila_xlat.c:132 [inline]
+  ila_xlat_addr net/ipv6/ila/ila_xlat.c:652 [inline]
+  ila_nf_input+0x1fe/0x3c0 net/ipv6/ila/ila_xlat.c:190
+  nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline]
+  nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626
+  nf_hook include/linux/netfilter.h:269 [inline]
+  NF_HOOK+0x29e/0x450 include/linux/netfilter.h:312
+  __netif_receive_skb_one_core net/core/dev.c:5661 [inline]
+  __netif_receive_skb+0x1ea/0x650 net/core/dev.c:5775
+  process_backlog+0x662/0x15b0 net/core/dev.c:6108
+  __napi_poll+0xcb/0x490 net/core/dev.c:6772
+  napi_poll net/core/dev.c:6841 [inline]
+  net_rx_action+0x89b/0x1240 net/core/dev.c:6963
+  handle_softirqs+0x2c4/0x970 kernel/softirq.c:554
+  run_ksoftirqd+0xca/0x130 kernel/softirq.c:928
+  smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164
+  kthread+0x2f0/0x390 kernel/kthread.c:389
+  ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+ </TASK>
+
+The buggy address belongs to the physical page:
+page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x64620
+flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff)
+page_type: 0xbfffffff(buddy)
+raw: 00fff00000000000 ffffea0000959608 ffffea00019d9408 0000000000000000
+raw: 0000000000000000 0000000000000003 00000000bfffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner tracks the page as freed
+page last allocated via order 3, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 5242, tgid 5242 (syz-executor), ts 73611328570, free_ts 618981657187
+  set_page_owner include/linux/page_owner.h:32 [inline]
+  post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1493
+  prep_new_page mm/page_alloc.c:1501 [inline]
+  get_page_from_freelist+0x2e4c/0x2f10 mm/page_alloc.c:3439
+  __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4695
+  __alloc_pages_node_noprof include/linux/gfp.h:269 [inline]
+  alloc_pages_node_noprof include/linux/gfp.h:296 [inline]
+  ___kmalloc_large_node+0x8b/0x1d0 mm/slub.c:4103
+  __kmalloc_large_node_noprof+0x1a/0x80 mm/slub.c:4130
+  __do_kmalloc_node mm/slub.c:4146 [inline]
+  __kmalloc_node_noprof+0x2d2/0x440 mm/slub.c:4164
+  __kvmalloc_node_noprof+0x72/0x190 mm/util.c:650
+  bucket_table_alloc lib/rhashtable.c:186 [inline]
+  rhashtable_init_noprof+0x534/0xa60 lib/rhashtable.c:1071
+  ila_xlat_init_net+0xa0/0x110 net/ipv6/ila/ila_xlat.c:613
+  ops_init+0x359/0x610 net/core/net_namespace.c:139
+  setup_net+0x515/0xca0 net/core/net_namespace.c:343
+  copy_net_ns+0x4e2/0x7b0 net/core/net_namespace.c:508
+  create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110
+  unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228
+  ksys_unshare+0x619/0xc10 kernel/fork.c:3328
+  __do_sys_unshare kernel/fork.c:3399 [inline]
+  __se_sys_unshare kernel/fork.c:3397 [inline]
+  __x64_sys_unshare+0x38/0x40 kernel/fork.c:3397
+page last free pid 11846 tgid 11846 stack trace:
+  reset_page_owner include/linux/page_owner.h:25 [inline]
+  free_pages_prepare mm/page_alloc.c:1094 [inline]
+  free_unref_page+0xd22/0xea0 mm/page_alloc.c:2612
+  __folio_put+0x2c8/0x440 mm/swap.c:128
+  folio_put include/linux/mm.h:1486 [inline]
+  free_large_kmalloc+0x105/0x1c0 mm/slub.c:4565
+  kfree+0x1c4/0x360 mm/slub.c:4588
+  rhashtable_free_and_destroy+0x7c6/0x920 lib/rhashtable.c:1169
+  ila_xlat_exit_net+0x55/0x110 net/ipv6/ila/ila_xlat.c:626
+  ops_exit_list net/core/net_namespace.c:173 [inline]
+  cleanup_net+0x802/0xcc0 net/core/net_namespace.c:640
+  process_one_work kernel/workqueue.c:3231 [inline]
+  process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312
+  worker_thread+0x86d/0xd40 kernel/workqueue.c:3390
+  kthread+0x2f0/0x390 kernel/kthread.c:389
+  ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+
+Memory state around the buggy address:
+ ffff88806461ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff88806461ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+>ffff888064620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+                      ^
+ ffff888064620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+ ffff888064620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+
+Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Reviewed-by: Florian Westphal <fw@strlen.de>
+Link: https://patch.msgid.link/20240904144418.1162839-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ila/ila.h      |    1 +
+ net/ipv6/ila/ila_main.c |    6 ++++++
+ net/ipv6/ila/ila_xlat.c |   13 +++++++++----
+ 3 files changed, 16 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ila/ila.h
++++ b/net/ipv6/ila/ila.h
+@@ -108,6 +108,7 @@ int ila_lwt_init(void);
+ void ila_lwt_fini(void);
+ int ila_xlat_init_net(struct net *net);
++void ila_xlat_pre_exit_net(struct net *net);
+ void ila_xlat_exit_net(struct net *net);
+ int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
+--- a/net/ipv6/ila/ila_main.c
++++ b/net/ipv6/ila/ila_main.c
+@@ -71,6 +71,11 @@ ila_xlat_init_fail:
+       return err;
+ }
++static __net_exit void ila_pre_exit_net(struct net *net)
++{
++      ila_xlat_pre_exit_net(net);
++}
++
+ static __net_exit void ila_exit_net(struct net *net)
+ {
+       ila_xlat_exit_net(net);
+@@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(stru
+ static struct pernet_operations ila_net_ops = {
+       .init = ila_init_net,
++      .pre_exit = ila_pre_exit_net,
+       .exit = ila_exit_net,
+       .id   = &ila_net_id,
+       .size = sizeof(struct ila_net),
+--- a/net/ipv6/ila/ila_xlat.c
++++ b/net/ipv6/ila/ila_xlat.c
+@@ -619,6 +619,15 @@ int ila_xlat_init_net(struct net *net)
+       return 0;
+ }
++void ila_xlat_pre_exit_net(struct net *net)
++{
++      struct ila_net *ilan = net_generic(net, ila_net_id);
++
++      if (ilan->xlat.hooks_registered)
++              nf_unregister_net_hooks(net, ila_nf_hook_ops,
++                                      ARRAY_SIZE(ila_nf_hook_ops));
++}
++
+ void ila_xlat_exit_net(struct net *net)
+ {
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+@@ -626,10 +635,6 @@ void ila_xlat_exit_net(struct net *net)
+       rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
+       free_bucket_spinlocks(ilan->xlat.locks);
+-
+-      if (ilan->xlat.hooks_registered)
+-              nf_unregister_net_hooks(net, ila_nf_hook_ops,
+-                                      ARRAY_SIZE(ila_nf_hook_ops));
+ }
+ static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/queue-6.10/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch b/queue-6.10/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch
new file mode 100644 (file)
index 0000000..1f5daeb
--- /dev/null
@@ -0,0 +1,118 @@
+From b6ecc662037694488bfff7c9fd21c405df8411f2 Mon Sep 17 00:00:00 2001
+From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
+Date: Mon, 2 Sep 2024 05:43:47 -0700
+Subject: net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup
+
+From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
+
+commit b6ecc662037694488bfff7c9fd21c405df8411f2 upstream.
+
+Currently napi_disable() gets called during rxq and txq cleanup,
+even before napi is enabled and hrtimer is initialized. It causes
+kernel panic.
+
+? page_fault_oops+0x136/0x2b0
+  ? page_counter_cancel+0x2e/0x80
+  ? do_user_addr_fault+0x2f2/0x640
+  ? refill_obj_stock+0xc4/0x110
+  ? exc_page_fault+0x71/0x160
+  ? asm_exc_page_fault+0x27/0x30
+  ? __mmdrop+0x10/0x180
+  ? __mmdrop+0xec/0x180
+  ? hrtimer_active+0xd/0x50
+  hrtimer_try_to_cancel+0x2c/0xf0
+  hrtimer_cancel+0x15/0x30
+  napi_disable+0x65/0x90
+  mana_destroy_rxq+0x4c/0x2f0
+  mana_create_rxq.isra.0+0x56c/0x6d0
+  ? mana_uncfg_vport+0x50/0x50
+  mana_alloc_queues+0x21b/0x320
+  ? skb_dequeue+0x5f/0x80
+
+Cc: stable@vger.kernel.org
+Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
+Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/microsoft/mana/mana_en.c |   22 +++++++++++++---------
+ include/net/mana/mana.h                       |    2 ++
+ 2 files changed, 15 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -1857,10 +1857,12 @@ static void mana_destroy_txq(struct mana
+       for (i = 0; i < apc->num_queues; i++) {
+               napi = &apc->tx_qp[i].tx_cq.napi;
+-              napi_synchronize(napi);
+-              napi_disable(napi);
+-              netif_napi_del(napi);
+-
++              if (apc->tx_qp[i].txq.napi_initialized) {
++                      napi_synchronize(napi);
++                      napi_disable(napi);
++                      netif_napi_del(napi);
++                      apc->tx_qp[i].txq.napi_initialized = false;
++              }
+               mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
+               mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
+@@ -1916,6 +1918,7 @@ static int mana_create_txq(struct mana_p
+               txq->ndev = net;
+               txq->net_txq = netdev_get_tx_queue(net, i);
+               txq->vp_offset = apc->tx_vp_offset;
++              txq->napi_initialized = false;
+               skb_queue_head_init(&txq->pending_skbs);
+               memset(&spec, 0, sizeof(spec));
+@@ -1982,6 +1985,7 @@ static int mana_create_txq(struct mana_p
+               netif_napi_add_tx(net, &cq->napi, mana_poll);
+               napi_enable(&cq->napi);
++              txq->napi_initialized = true;
+               mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
+       }
+@@ -1993,7 +1997,7 @@ out:
+ }
+ static void mana_destroy_rxq(struct mana_port_context *apc,
+-                           struct mana_rxq *rxq, bool validate_state)
++                           struct mana_rxq *rxq, bool napi_initialized)
+ {
+       struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
+@@ -2008,15 +2012,15 @@ static void mana_destroy_rxq(struct mana
+       napi = &rxq->rx_cq.napi;
+-      if (validate_state)
++      if (napi_initialized) {
+               napi_synchronize(napi);
+-      napi_disable(napi);
++              napi_disable(napi);
++              netif_napi_del(napi);
++      }
+       xdp_rxq_info_unreg(&rxq->xdp_rxq);
+-      netif_napi_del(napi);
+-
+       mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
+       mana_deinit_cq(apc, &rxq->rx_cq);
+--- a/include/net/mana/mana.h
++++ b/include/net/mana/mana.h
+@@ -97,6 +97,8 @@ struct mana_txq {
+       atomic_t pending_sends;
++      bool napi_initialized;
++
+       struct mana_stats_tx stats;
+ };
diff --git a/queue-6.10/net-mctp-serial-fix-missing-escapes-on-transmit.patch b/queue-6.10/net-mctp-serial-fix-missing-escapes-on-transmit.patch
new file mode 100644 (file)
index 0000000..b548932
--- /dev/null
@@ -0,0 +1,55 @@
+From f962e8361adfa84e8252d3fc3e5e6bb879f029b1 Mon Sep 17 00:00:00 2001
+From: Matt Johnston <matt@codeconstruct.com.au>
+Date: Thu, 29 Aug 2024 15:43:46 +0800
+Subject: net: mctp-serial: Fix missing escapes on transmit
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+commit f962e8361adfa84e8252d3fc3e5e6bb879f029b1 upstream.
+
+0x7d and 0x7e bytes are meant to be escaped in the data portion of
+frames, but this didn't occur since next_chunk_len() had an off-by-one
+error. That also resulted in the final byte of a payload being written
+as a separate tty write op.
+
+The chunk prior to an escaped byte would be one byte short, and the
+next call would never test the txpos+1 case, which is where the escaped
+byte was located. That meant it never hit the escaping case in
+mctp_serial_tx_work().
+
+Example Input: 01 00 08 c8 7e 80 02
+
+Previous incorrect chunks from next_chunk_len():
+
+01 00 08
+c8 7e 80
+02
+
+With this fix:
+
+01 00 08 c8
+7e
+80 02
+
+Cc: stable@vger.kernel.org
+Fixes: a0c2ccd9b5ad ("mctp: Add MCTP-over-serial transport binding")
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/mctp/mctp-serial.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/mctp/mctp-serial.c
++++ b/drivers/net/mctp/mctp-serial.c
+@@ -91,8 +91,8 @@ static int next_chunk_len(struct mctp_se
+        * will be those non-escaped bytes, and does not include the escaped
+        * byte.
+        */
+-      for (i = 1; i + dev->txpos + 1 < dev->txlen; i++) {
+-              if (needs_escape(dev->txbuf[dev->txpos + i + 1]))
++      for (i = 1; i + dev->txpos < dev->txlen; i++) {
++              if (needs_escape(dev->txbuf[dev->txpos + i]))
+                       break;
+       }
diff --git a/queue-6.10/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch b/queue-6.10/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch
new file mode 100644 (file)
index 0000000..1f42fec
--- /dev/null
@@ -0,0 +1,90 @@
+From 5787fcaab9eb5930f5378d6a1dd03d916d146622 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sat, 10 Aug 2024 15:52:42 +0900
+Subject: nilfs2: fix missing cleanup on rollforward recovery error
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 5787fcaab9eb5930f5378d6a1dd03d916d146622 upstream.
+
+In an error injection test of a routine for mount-time recovery, KASAN
+found a use-after-free bug.
+
+It turned out that if data recovery was performed using partial logs
+created by dsync writes, but an error occurred before starting the log
+writer to create a recovered checkpoint, the inodes whose data had been
+recovered were left in the ns_dirty_files list of the nilfs object and
+were not freed.
+
+Fix this issue by cleaning up inodes that have read the recovery data if
+the recovery routine fails midway before the log writer starts.
+
+Link: https://lkml.kernel.org/r/20240810065242.3701-1-konishi.ryusuke@gmail.com
+Fixes: 0f3e1c7f23f8 ("nilfs2: recovery functions")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/recovery.c |   35 +++++++++++++++++++++++++++++++++--
+ 1 file changed, 33 insertions(+), 2 deletions(-)
+
+--- a/fs/nilfs2/recovery.c
++++ b/fs/nilfs2/recovery.c
+@@ -716,6 +716,33 @@ static void nilfs_finish_roll_forward(st
+ }
+ /**
++ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
++ * @nilfs: nilfs object
++ */
++static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
++{
++      struct nilfs_inode_info *ii, *n;
++      LIST_HEAD(head);
++
++      /* Abandon inodes that have read recovery data */
++      spin_lock(&nilfs->ns_inode_lock);
++      list_splice_init(&nilfs->ns_dirty_files, &head);
++      spin_unlock(&nilfs->ns_inode_lock);
++      if (list_empty(&head))
++              return;
++
++      set_nilfs_purging(nilfs);
++      list_for_each_entry_safe(ii, n, &head, i_dirty) {
++              spin_lock(&nilfs->ns_inode_lock);
++              list_del_init(&ii->i_dirty);
++              spin_unlock(&nilfs->ns_inode_lock);
++
++              iput(&ii->vfs_inode);
++      }
++      clear_nilfs_purging(nilfs);
++}
++
++/**
+  * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
+  * @nilfs: nilfs object
+  * @sb: super block instance
+@@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the
+               if (unlikely(err)) {
+                       nilfs_err(sb, "error %d writing segment for recovery",
+                                 err);
+-                      goto failed;
++                      goto put_root;
+               }
+               nilfs_finish_roll_forward(nilfs, ri);
+       }
+- failed:
++put_root:
+       nilfs_put_root(root);
+       return err;
++
++failed:
++      nilfs_abort_roll_forward(nilfs);
++      goto put_root;
+ }
+ /**
diff --git a/queue-6.10/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch b/queue-6.10/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch
new file mode 100644 (file)
index 0000000..c62377b
--- /dev/null
@@ -0,0 +1,81 @@
+From 6576dd6695f2afca3f4954029ac4a64f82ba60ab Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Wed, 14 Aug 2024 19:11:19 +0900
+Subject: nilfs2: fix state management in error path of log writing function
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 6576dd6695f2afca3f4954029ac4a64f82ba60ab upstream.
+
+After commit a694291a6211 ("nilfs2: separate wait function from
+nilfs_segctor_write") was applied, the log writing function
+nilfs_segctor_do_construct() was able to issue I/O requests continuously
+even if user data blocks were split into multiple logs across segments,
+but two potential flaws were introduced in its error handling.
+
+First, if nilfs_segctor_begin_construction() fails while creating the
+second or subsequent logs, the log writing function returns without
+calling nilfs_segctor_abort_construction(), so the writeback flag set on
+pages/folios will remain uncleared.  This causes page cache operations to
+hang waiting for the writeback flag.  For example,
+truncate_inode_pages_final(), which is called via nilfs_evict_inode() when
+an inode is evicted from memory, will hang.
+
+Second, the NILFS_I_COLLECTED flag set on normal inodes remain uncleared.
+As a result, if the next log write involves checkpoint creation, that's
+fine, but if a partial log write is performed that does not, inodes with
+NILFS_I_COLLECTED set are erroneously removed from the "sc_dirty_files"
+list, and their data and b-tree blocks may not be written to the device,
+corrupting the block mapping.
+
+Fix these issues by uniformly calling nilfs_segctor_abort_construction()
+on failure of each step in the loop in nilfs_segctor_do_construct(),
+having it clean up logs and segment usages according to progress, and
+correcting the conditions for calling nilfs_redirty_inodes() to ensure
+that the NILFS_I_COLLECTED flag is cleared.
+
+Link: https://lkml.kernel.org/r/20240814101119.4070-1-konishi.ryusuke@gmail.com
+Fixes: a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -1788,6 +1788,9 @@ static void nilfs_segctor_abort_construc
+       nilfs_abort_logs(&logs, ret ? : err);
+       list_splice_tail_init(&sci->sc_segbufs, &logs);
++      if (list_empty(&logs))
++              return; /* if the first segment buffer preparation failed */
++
+       nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
+       nilfs_free_incomplete_logs(&logs, nilfs);
+@@ -2032,7 +2035,7 @@ static int nilfs_segctor_do_construct(st
+               err = nilfs_segctor_begin_construction(sci, nilfs);
+               if (unlikely(err))
+-                      goto out;
++                      goto failed;
+               /* Update time stamp */
+               sci->sc_seg_ctime = ktime_get_real_seconds();
+@@ -2099,10 +2102,9 @@ static int nilfs_segctor_do_construct(st
+       return err;
+  failed_to_write:
+-      if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
+-              nilfs_redirty_inodes(&sci->sc_dirty_files);
+-
+  failed:
++      if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
++              nilfs_redirty_inodes(&sci->sc_dirty_files);
+       if (nilfs_doing_gc())
+               nilfs_redirty_inodes(&sci->sc_gc_inodes);
+       nilfs_segctor_abort_construction(sci, nilfs, err);
diff --git a/queue-6.10/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch b/queue-6.10/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch
new file mode 100644 (file)
index 0000000..aaefbe9
--- /dev/null
@@ -0,0 +1,104 @@
+From 683408258917541bdb294cd717c210a04381931e Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sun, 11 Aug 2024 19:03:20 +0900
+Subject: nilfs2: protect references to superblock parameters exposed in sysfs
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 683408258917541bdb294cd717c210a04381931e upstream.
+
+The superblock buffers of nilfs2 can not only be overwritten at runtime
+for modifications/repairs, but they are also regularly swapped, replaced
+during resizing, and even abandoned when degrading to one side due to
+backing device issues.  So, accessing them requires mutual exclusion using
+the reader/writer semaphore "nilfs->ns_sem".
+
+Some sysfs attribute show methods read this superblock buffer without the
+necessary mutual exclusion, which can cause problems with pointer
+dereferencing and memory access, so fix it.
+
+Link: https://lkml.kernel.org/r/20240811100320.9913-1-konishi.ryusuke@gmail.com
+Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/<device> group")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/sysfs.c |   43 +++++++++++++++++++++++++++++++++----------
+ 1 file changed, 33 insertions(+), 10 deletions(-)
+
+--- a/fs/nilfs2/sysfs.c
++++ b/fs/nilfs2/sysfs.c
+@@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct n
+                               struct the_nilfs *nilfs,
+                               char *buf)
+ {
+-      struct nilfs_super_block **sbp = nilfs->ns_sbp;
+-      u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+-      u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
++      struct nilfs_super_block *raw_sb;
++      u32 major;
++      u16 minor;
++
++      down_read(&nilfs->ns_sem);
++      raw_sb = nilfs->ns_sbp[0];
++      major = le32_to_cpu(raw_sb->s_rev_level);
++      minor = le16_to_cpu(raw_sb->s_minor_rev_level);
++      up_read(&nilfs->ns_sem);
+       return sysfs_emit(buf, "%d.%d\n", major, minor);
+ }
+@@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struc
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+ {
+-      struct nilfs_super_block **sbp = nilfs->ns_sbp;
+-      u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
++      struct nilfs_super_block *raw_sb;
++      u64 dev_size;
++
++      down_read(&nilfs->ns_sem);
++      raw_sb = nilfs->ns_sbp[0];
++      dev_size = le64_to_cpu(raw_sb->s_dev_size);
++      up_read(&nilfs->ns_sem);
+       return sysfs_emit(buf, "%llu\n", dev_size);
+ }
+@@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs
+                           struct the_nilfs *nilfs,
+                           char *buf)
+ {
+-      struct nilfs_super_block **sbp = nilfs->ns_sbp;
++      struct nilfs_super_block *raw_sb;
++      ssize_t len;
++
++      down_read(&nilfs->ns_sem);
++      raw_sb = nilfs->ns_sbp[0];
++      len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
++      up_read(&nilfs->ns_sem);
+-      return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid);
++      return len;
+ }
+ static
+@@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struc
+                                   struct the_nilfs *nilfs,
+                                   char *buf)
+ {
+-      struct nilfs_super_block **sbp = nilfs->ns_sbp;
++      struct nilfs_super_block *raw_sb;
++      ssize_t len;
++
++      down_read(&nilfs->ns_sem);
++      raw_sb = nilfs->ns_sbp[0];
++      len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
++                      raw_sb->s_volume_name);
++      up_read(&nilfs->ns_sem);
+-      return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+-                       sbp[0]->s_volume_name);
++      return len;
+ }
+ static const char dev_readme_str[] =
diff --git a/queue-6.10/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch b/queue-6.10/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch
new file mode 100644 (file)
index 0000000..3d13e93
--- /dev/null
@@ -0,0 +1,45 @@
+From 1a8d845470941f1b6de1b392227530c097dc5e0c Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Thu, 5 Sep 2024 14:24:38 -0400
+Subject: Revert "drm/amdgpu: align pp_power_profile_mode with kernel docs"
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 1a8d845470941f1b6de1b392227530c097dc5e0c upstream.
+
+This reverts commit 8f614469de248a4bc55fb07e55d5f4c340c75b11.
+
+This breaks some manual setting of the profile mode in
+certain cases.
+
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3600
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit 7a199557643e993d4e7357860624b8aa5d8f4340)
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -2257,7 +2257,8 @@ static int smu_adjust_power_state_dynami
+               smu_dpm_ctx->dpm_level = level;
+       }
+-      if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
++      if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
++              smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+               index = fls(smu->workload_mask);
+               index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+               workload[0] = smu->workload_setting[index];
+@@ -2336,7 +2337,8 @@ static int smu_switch_power_profile(void
+               workload[0] = smu->workload_setting[index];
+       }
+-      if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
++      if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
++              smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
+               smu_bump_power_profile_mode(smu, workload, 0);
+       return 0;
diff --git a/queue-6.10/revert-wifi-ath11k-restore-country-code-during-resume.patch b/queue-6.10/revert-wifi-ath11k-restore-country-code-during-resume.patch
new file mode 100644 (file)
index 0000000..8deff7b
--- /dev/null
@@ -0,0 +1,43 @@
+From d3e154d7776ba57ab679fb816fb87b627fba21c9 Mon Sep 17 00:00:00 2001
+From: Baochen Qiang <quic_bqiang@quicinc.com>
+Date: Fri, 30 Aug 2024 15:34:19 +0800
+Subject: Revert "wifi: ath11k: restore country code during resume"
+
+From: Baochen Qiang <quic_bqiang@quicinc.com>
+
+commit d3e154d7776ba57ab679fb816fb87b627fba21c9 upstream.
+
+This reverts commit 7f0343b7b8710436c1e6355c71782d32ada47e0c.
+
+We are going to revert commit 166a490f59ac ("wifi: ath11k: support hibernation"), on
+which this commit depends. With that commit reverted, this one is not needed any
+more, so revert this commit first.
+
+Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
+Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://patch.msgid.link/20240830073420.5790-2-quic_bqiang@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/ath/ath11k/core.c |   10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -1009,16 +1009,6 @@ int ath11k_core_resume(struct ath11k_bas
+               return -ETIMEDOUT;
+       }
+-      if (ab->hw_params.current_cc_support &&
+-          ar->alpha2[0] != 0 && ar->alpha2[1] != 0) {
+-              ret = ath11k_reg_set_cc(ar);
+-              if (ret) {
+-                      ath11k_warn(ab, "failed to set country code during resume: %d\n",
+-                                  ret);
+-                      return ret;
+-              }
+-      }
+-
+       ret = ath11k_dp_rx_pktlog_start(ab);
+       if (ret)
+               ath11k_warn(ab, "failed to start rx pktlog during resume: %d\n",
diff --git a/queue-6.10/revert-wifi-ath11k-support-hibernation.patch b/queue-6.10/revert-wifi-ath11k-support-hibernation.patch
new file mode 100644 (file)
index 0000000..0f647d9
--- /dev/null
@@ -0,0 +1,420 @@
+From 2f833e8948d6c88a3a257d4e426c9897b4907d5a Mon Sep 17 00:00:00 2001
+From: Baochen Qiang <quic_bqiang@quicinc.com>
+Date: Fri, 30 Aug 2024 15:34:20 +0800
+Subject: Revert "wifi: ath11k: support hibernation"
+
+From: Baochen Qiang <quic_bqiang@quicinc.com>
+
+commit 2f833e8948d6c88a3a257d4e426c9897b4907d5a upstream.
+
+This reverts commit 166a490f59ac10340ee5330e51c15188ce2a7f8f.
+
+There are several reports that this commit breaks system suspend on some specific
+Lenovo platforms. Since there is no fix available, for now revert this commit
+to make suspend work again on those platforms.
+
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219196
+Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2301921
+Cc: <stable@vger.kernel.org> # 6.10.x: d3e154d7776b: Revert "wifi: ath11k: restore country code during resume"
+Cc: <stable@vger.kernel.org> # 6.10.x
+Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
+Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://patch.msgid.link/20240830073420.5790-3-quic_bqiang@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/ath/ath11k/ahb.c  |    4 -
+ drivers/net/wireless/ath/ath11k/core.c |  107 +++++++++------------------------
+ drivers/net/wireless/ath/ath11k/core.h |    4 -
+ drivers/net/wireless/ath/ath11k/hif.h  |   12 ---
+ drivers/net/wireless/ath/ath11k/mhi.c  |   12 ---
+ drivers/net/wireless/ath/ath11k/mhi.h  |    3 
+ drivers/net/wireless/ath/ath11k/pci.c  |   44 ++-----------
+ drivers/net/wireless/ath/ath11k/qmi.c  |    2 
+ 8 files changed, 49 insertions(+), 139 deletions(-)
+
+--- a/drivers/net/wireless/ath/ath11k/ahb.c
++++ b/drivers/net/wireless/ath/ath11k/ahb.c
+@@ -413,7 +413,7 @@ static int ath11k_ahb_power_up(struct at
+       return ret;
+ }
+-static void ath11k_ahb_power_down(struct ath11k_base *ab, bool is_suspend)
++static void ath11k_ahb_power_down(struct ath11k_base *ab)
+ {
+       struct ath11k_ahb *ab_ahb = ath11k_ahb_priv(ab);
+@@ -1261,7 +1261,7 @@ static void ath11k_ahb_remove(struct pla
+       struct ath11k_base *ab = platform_get_drvdata(pdev);
+       if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) {
+-              ath11k_ahb_power_down(ab, false);
++              ath11k_ahb_power_down(ab);
+               ath11k_debugfs_soc_destroy(ab);
+               ath11k_qmi_deinit_service(ab);
+               goto qmi_fail;
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -906,6 +906,12 @@ int ath11k_core_suspend(struct ath11k_ba
+               return ret;
+       }
++      ret = ath11k_wow_enable(ab);
++      if (ret) {
++              ath11k_warn(ab, "failed to enable wow during suspend: %d\n", ret);
++              return ret;
++      }
++
+       ret = ath11k_dp_rx_pktlog_stop(ab, false);
+       if (ret) {
+               ath11k_warn(ab, "failed to stop dp rx pktlog during suspend: %d\n",
+@@ -916,85 +922,29 @@ int ath11k_core_suspend(struct ath11k_ba
+       ath11k_ce_stop_shadow_timers(ab);
+       ath11k_dp_stop_shadow_timers(ab);
+-      /* PM framework skips suspend_late/resume_early callbacks
+-       * if other devices report errors in their suspend callbacks.
+-       * However ath11k_core_resume() would still be called because
+-       * here we return success thus kernel put us on dpm_suspended_list.
+-       * Since we won't go through a power down/up cycle, there is
+-       * no chance to call complete(&ab->restart_completed) in
+-       * ath11k_core_restart(), making ath11k_core_resume() timeout.
+-       * So call it here to avoid this issue. This also works in case
+-       * no error happens thus suspend_late/resume_early get called,
+-       * because it will be reinitialized in ath11k_core_resume_early().
+-       */
+-      complete(&ab->restart_completed);
+-
+-      return 0;
+-}
+-EXPORT_SYMBOL(ath11k_core_suspend);
+-
+-int ath11k_core_suspend_late(struct ath11k_base *ab)
+-{
+-      struct ath11k_pdev *pdev;
+-      struct ath11k *ar;
+-
+-      if (!ab->hw_params.supports_suspend)
+-              return -EOPNOTSUPP;
+-
+-      /* so far single_pdev_only chips have supports_suspend as true
+-       * and only the first pdev is valid.
+-       */
+-      pdev = ath11k_core_get_single_pdev(ab);
+-      ar = pdev->ar;
+-      if (!ar || ar->state != ATH11K_STATE_OFF)
+-              return 0;
+-
+       ath11k_hif_irq_disable(ab);
+       ath11k_hif_ce_irq_disable(ab);
+-      ath11k_hif_power_down(ab, true);
++      ret = ath11k_hif_suspend(ab);
++      if (ret) {
++              ath11k_warn(ab, "failed to suspend hif: %d\n", ret);
++              return ret;
++      }
+       return 0;
+ }
+-EXPORT_SYMBOL(ath11k_core_suspend_late);
+-
+-int ath11k_core_resume_early(struct ath11k_base *ab)
+-{
+-      int ret;
+-      struct ath11k_pdev *pdev;
+-      struct ath11k *ar;
+-
+-      if (!ab->hw_params.supports_suspend)
+-              return -EOPNOTSUPP;
+-
+-      /* so far single_pdev_only chips have supports_suspend as true
+-       * and only the first pdev is valid.
+-       */
+-      pdev = ath11k_core_get_single_pdev(ab);
+-      ar = pdev->ar;
+-      if (!ar || ar->state != ATH11K_STATE_OFF)
+-              return 0;
+-
+-      reinit_completion(&ab->restart_completed);
+-      ret = ath11k_hif_power_up(ab);
+-      if (ret)
+-              ath11k_warn(ab, "failed to power up hif during resume: %d\n", ret);
+-
+-      return ret;
+-}
+-EXPORT_SYMBOL(ath11k_core_resume_early);
++EXPORT_SYMBOL(ath11k_core_suspend);
+ int ath11k_core_resume(struct ath11k_base *ab)
+ {
+       int ret;
+       struct ath11k_pdev *pdev;
+       struct ath11k *ar;
+-      long time_left;
+       if (!ab->hw_params.supports_suspend)
+               return -EOPNOTSUPP;
+-      /* so far single_pdev_only chips have supports_suspend as true
++      /* so far signle_pdev_only chips have supports_suspend as true
+        * and only the first pdev is valid.
+        */
+       pdev = ath11k_core_get_single_pdev(ab);
+@@ -1002,19 +952,29 @@ int ath11k_core_resume(struct ath11k_bas
+       if (!ar || ar->state != ATH11K_STATE_OFF)
+               return 0;
+-      time_left = wait_for_completion_timeout(&ab->restart_completed,
+-                                              ATH11K_RESET_TIMEOUT_HZ);
+-      if (time_left == 0) {
+-              ath11k_warn(ab, "timeout while waiting for restart complete");
+-              return -ETIMEDOUT;
++      ret = ath11k_hif_resume(ab);
++      if (ret) {
++              ath11k_warn(ab, "failed to resume hif during resume: %d\n", ret);
++              return ret;
+       }
++      ath11k_hif_ce_irq_enable(ab);
++      ath11k_hif_irq_enable(ab);
++
+       ret = ath11k_dp_rx_pktlog_start(ab);
+-      if (ret)
++      if (ret) {
+               ath11k_warn(ab, "failed to start rx pktlog during resume: %d\n",
+                           ret);
++              return ret;
++      }
+-      return ret;
++      ret = ath11k_wow_wakeup(ab);
++      if (ret) {
++              ath11k_warn(ab, "failed to wakeup wow during resume: %d\n", ret);
++              return ret;
++      }
++
++      return 0;
+ }
+ EXPORT_SYMBOL(ath11k_core_resume);
+@@ -2109,8 +2069,6 @@ static void ath11k_core_restart(struct w
+       if (!ab->is_reset)
+               ath11k_core_post_reconfigure_recovery(ab);
+-
+-      complete(&ab->restart_completed);
+ }
+ static void ath11k_core_reset(struct work_struct *work)
+@@ -2180,7 +2138,7 @@ static void ath11k_core_reset(struct wor
+       ath11k_hif_irq_disable(ab);
+       ath11k_hif_ce_irq_disable(ab);
+-      ath11k_hif_power_down(ab, false);
++      ath11k_hif_power_down(ab);
+       ath11k_hif_power_up(ab);
+       ath11k_dbg(ab, ATH11K_DBG_BOOT, "reset started\n");
+@@ -2253,7 +2211,7 @@ void ath11k_core_deinit(struct ath11k_ba
+       mutex_unlock(&ab->core_lock);
+-      ath11k_hif_power_down(ab, false);
++      ath11k_hif_power_down(ab);
+       ath11k_mac_destroy(ab);
+       ath11k_core_soc_destroy(ab);
+       ath11k_fw_destroy(ab);
+@@ -2306,7 +2264,6 @@ struct ath11k_base *ath11k_core_alloc(st
+       timer_setup(&ab->rx_replenish_retry, ath11k_ce_rx_replenish_retry, 0);
+       init_completion(&ab->htc_suspend);
+       init_completion(&ab->wow.wakeup_completed);
+-      init_completion(&ab->restart_completed);
+       ab->dev = dev;
+       ab->hif.bus = bus;
+--- a/drivers/net/wireless/ath/ath11k/core.h
++++ b/drivers/net/wireless/ath/ath11k/core.h
+@@ -1033,8 +1033,6 @@ struct ath11k_base {
+               DECLARE_BITMAP(fw_features, ATH11K_FW_FEATURE_COUNT);
+       } fw;
+-      struct completion restart_completed;
+-
+ #ifdef CONFIG_NL80211_TESTMODE
+       struct {
+               u32 data_pos;
+@@ -1234,10 +1232,8 @@ void ath11k_core_free_bdf(struct ath11k_
+ int ath11k_core_check_dt(struct ath11k_base *ath11k);
+ int ath11k_core_check_smbios(struct ath11k_base *ab);
+ void ath11k_core_halt(struct ath11k *ar);
+-int ath11k_core_resume_early(struct ath11k_base *ab);
+ int ath11k_core_resume(struct ath11k_base *ab);
+ int ath11k_core_suspend(struct ath11k_base *ab);
+-int ath11k_core_suspend_late(struct ath11k_base *ab);
+ void ath11k_core_pre_reconfigure_recovery(struct ath11k_base *ab);
+ bool ath11k_core_coldboot_cal_support(struct ath11k_base *ab);
+--- a/drivers/net/wireless/ath/ath11k/hif.h
++++ b/drivers/net/wireless/ath/ath11k/hif.h
+@@ -18,7 +18,7 @@ struct ath11k_hif_ops {
+       int (*start)(struct ath11k_base *ab);
+       void (*stop)(struct ath11k_base *ab);
+       int (*power_up)(struct ath11k_base *ab);
+-      void (*power_down)(struct ath11k_base *ab, bool is_suspend);
++      void (*power_down)(struct ath11k_base *ab);
+       int (*suspend)(struct ath11k_base *ab);
+       int (*resume)(struct ath11k_base *ab);
+       int (*map_service_to_pipe)(struct ath11k_base *ab, u16 service_id,
+@@ -67,18 +67,12 @@ static inline void ath11k_hif_irq_disabl
+ static inline int ath11k_hif_power_up(struct ath11k_base *ab)
+ {
+-      if (!ab->hif.ops->power_up)
+-              return -EOPNOTSUPP;
+-
+       return ab->hif.ops->power_up(ab);
+ }
+-static inline void ath11k_hif_power_down(struct ath11k_base *ab, bool is_suspend)
++static inline void ath11k_hif_power_down(struct ath11k_base *ab)
+ {
+-      if (!ab->hif.ops->power_down)
+-              return;
+-
+-      ab->hif.ops->power_down(ab, is_suspend);
++      ab->hif.ops->power_down(ab);
+ }
+ static inline int ath11k_hif_suspend(struct ath11k_base *ab)
+--- a/drivers/net/wireless/ath/ath11k/mhi.c
++++ b/drivers/net/wireless/ath/ath11k/mhi.c
+@@ -453,17 +453,9 @@ int ath11k_mhi_start(struct ath11k_pci *
+       return 0;
+ }
+-void ath11k_mhi_stop(struct ath11k_pci *ab_pci, bool is_suspend)
++void ath11k_mhi_stop(struct ath11k_pci *ab_pci)
+ {
+-      /* During suspend we need to use mhi_power_down_keep_dev()
+-       * workaround, otherwise ath11k_core_resume() will timeout
+-       * during resume.
+-       */
+-      if (is_suspend)
+-              mhi_power_down_keep_dev(ab_pci->mhi_ctrl, true);
+-      else
+-              mhi_power_down(ab_pci->mhi_ctrl, true);
+-
++      mhi_power_down(ab_pci->mhi_ctrl, true);
+       mhi_unprepare_after_power_down(ab_pci->mhi_ctrl);
+ }
+--- a/drivers/net/wireless/ath/ath11k/mhi.h
++++ b/drivers/net/wireless/ath/ath11k/mhi.h
+@@ -18,7 +18,7 @@
+ #define MHICTRL_RESET_MASK                    0x2
+ int ath11k_mhi_start(struct ath11k_pci *ar_pci);
+-void ath11k_mhi_stop(struct ath11k_pci *ar_pci, bool is_suspend);
++void ath11k_mhi_stop(struct ath11k_pci *ar_pci);
+ int ath11k_mhi_register(struct ath11k_pci *ar_pci);
+ void ath11k_mhi_unregister(struct ath11k_pci *ar_pci);
+ void ath11k_mhi_set_mhictrl_reset(struct ath11k_base *ab);
+@@ -26,4 +26,5 @@ void ath11k_mhi_clear_vector(struct ath1
+ int ath11k_mhi_suspend(struct ath11k_pci *ar_pci);
+ int ath11k_mhi_resume(struct ath11k_pci *ar_pci);
++
+ #endif
+--- a/drivers/net/wireless/ath/ath11k/pci.c
++++ b/drivers/net/wireless/ath/ath11k/pci.c
+@@ -638,7 +638,7 @@ static int ath11k_pci_power_up(struct at
+       return 0;
+ }
+-static void ath11k_pci_power_down(struct ath11k_base *ab, bool is_suspend)
++static void ath11k_pci_power_down(struct ath11k_base *ab)
+ {
+       struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
+@@ -649,7 +649,7 @@ static void ath11k_pci_power_down(struct
+       ath11k_pci_msi_disable(ab_pci);
+-      ath11k_mhi_stop(ab_pci, is_suspend);
++      ath11k_mhi_stop(ab_pci);
+       clear_bit(ATH11K_FLAG_DEVICE_INIT_DONE, &ab->dev_flags);
+       ath11k_pci_sw_reset(ab_pci->ab, false);
+ }
+@@ -970,7 +970,7 @@ static void ath11k_pci_remove(struct pci
+       ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
+       if (test_bit(ATH11K_FLAG_QMI_FAIL, &ab->dev_flags)) {
+-              ath11k_pci_power_down(ab, false);
++              ath11k_pci_power_down(ab);
+               ath11k_debugfs_soc_destroy(ab);
+               ath11k_qmi_deinit_service(ab);
+               goto qmi_fail;
+@@ -998,7 +998,7 @@ static void ath11k_pci_shutdown(struct p
+       struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
+       ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
+-      ath11k_pci_power_down(ab, false);
++      ath11k_pci_power_down(ab);
+ }
+ static __maybe_unused int ath11k_pci_pm_suspend(struct device *dev)
+@@ -1035,39 +1035,9 @@ static __maybe_unused int ath11k_pci_pm_
+       return ret;
+ }
+-static __maybe_unused int ath11k_pci_pm_suspend_late(struct device *dev)
+-{
+-      struct ath11k_base *ab = dev_get_drvdata(dev);
+-      int ret;
+-
+-      ret = ath11k_core_suspend_late(ab);
+-      if (ret)
+-              ath11k_warn(ab, "failed to late suspend core: %d\n", ret);
+-
+-      /* Similar to ath11k_pci_pm_suspend(), we return success here
+-       * even error happens, to allow system suspend/hibernation survive.
+-       */
+-      return 0;
+-}
+-
+-static __maybe_unused int ath11k_pci_pm_resume_early(struct device *dev)
+-{
+-      struct ath11k_base *ab = dev_get_drvdata(dev);
+-      int ret;
+-
+-      ret = ath11k_core_resume_early(ab);
+-      if (ret)
+-              ath11k_warn(ab, "failed to early resume core: %d\n", ret);
+-
+-      return ret;
+-}
+-
+-static const struct dev_pm_ops __maybe_unused ath11k_pci_pm_ops = {
+-      SET_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend,
+-                              ath11k_pci_pm_resume)
+-      SET_LATE_SYSTEM_SLEEP_PM_OPS(ath11k_pci_pm_suspend_late,
+-                                   ath11k_pci_pm_resume_early)
+-};
++static SIMPLE_DEV_PM_OPS(ath11k_pci_pm_ops,
++                       ath11k_pci_pm_suspend,
++                       ath11k_pci_pm_resume);
+ static struct pci_driver ath11k_pci_driver = {
+       .name = "ath11k_pci",
+--- a/drivers/net/wireless/ath/ath11k/qmi.c
++++ b/drivers/net/wireless/ath/ath11k/qmi.c
+@@ -2877,7 +2877,7 @@ int ath11k_qmi_fwreset_from_cold_boot(st
+       }
+       /* reset the firmware */
+-      ath11k_hif_power_down(ab, false);
++      ath11k_hif_power_down(ab);
+       ath11k_hif_power_up(ab);
+       ath11k_dbg(ab, ATH11K_DBG_QMI, "exit wait for cold boot done\n");
+       return 0;
diff --git a/queue-6.10/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch b/queue-6.10/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch
new file mode 100644 (file)
index 0000000..06b7c9b
--- /dev/null
@@ -0,0 +1,83 @@
+From 546ea84d07e3e324644025e2aae2d12ea4c5896e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Tue, 3 Sep 2024 18:08:45 +0200
+Subject: sched: sch_cake: fix bulk flow accounting logic for host fairness
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+commit 546ea84d07e3e324644025e2aae2d12ea4c5896e upstream.
+
+In sch_cake, we keep track of the count of active bulk flows per host,
+when running in dst/src host fairness mode, which is used as the
+round-robin weight when iterating through flows. The count of active
+bulk flows is updated whenever a flow changes state.
+
+This has a peculiar interaction with the hash collision handling: when a
+hash collision occurs (after the set-associative hashing), the state of
+the hash bucket is simply updated to match the new packet that collided,
+and if host fairness is enabled, that also means assigning new per-host
+state to the flow. For this reason, the bulk flow counters of the
+host(s) assigned to the flow are decremented, before new state is
+assigned (and the counters, which may not belong to the same host
+anymore, are incremented again).
+
+Back when this code was introduced, the host fairness mode was always
+enabled, so the decrement was unconditional. When the configuration
+flags were introduced the *increment* was made conditional, but
+the *decrement* was not. Which of course can lead to a spurious
+decrement (and associated wrap-around to U16_MAX).
+
+AFAICT, when host fairness is disabled, the decrement and wrap-around
+happens as soon as a hash collision occurs (which is not that common in
+itself, due to the set-associative hashing). However, in most cases this
+is harmless, as the value is only used when host fairness mode is
+enabled. So in order to trigger an array overflow, sch_cake has to first
+be configured with host fairness disabled, and while running in this
+mode, a hash collision has to occur to cause the overflow. Then, the
+qdisc has to be reconfigured to enable host fairness, which leads to the
+array out-of-bounds because the wrapped-around value is retained and
+used as an array index. It seems that syzbot managed to trigger this,
+which is quite impressive in its own right.
+
+This patch fixes the issue by introducing the same conditional check on
+decrement as is used on increment.
+
+The original bug predates the upstreaming of cake, but the commit listed
+in the Fixes tag touched that code, meaning that this patch won't apply
+before that.
+
+Fixes: 712639929912 ("sch_cake: Make the dual modes fairer")
+Reported-by: syzbot+7fe7b81d602cc1e6b94d@syzkaller.appspotmail.com
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://patch.msgid.link/20240903160846.20909-1-toke@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -786,12 +786,15 @@ skip_hash:
+                * queue, accept the collision, update the host tags.
+                */
+               q->way_collisions++;
+-              if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+-                      q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+-                      q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+-              }
+               allocate_src = cake_dsrc(flow_mode);
+               allocate_dst = cake_ddst(flow_mode);
++
++              if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
++                      if (allocate_src)
++                              q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
++                      if (allocate_dst)
++                              q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
++              }
+ found:
+               /* reserve queue for future packets in same flow */
+               reduced_hash = outer_hash + k;
index 99904d1e95ad9acd2c49ef3a247ffb23c49c857d..df03e84548ae19fb3e474919becf3b551136eb01 100644 (file)
@@ -66,3 +66,20 @@ tracing-timerlat-add-interface_lock-around-clearing-of-kthread-in-stop_kthread.p
 userfaultfd-don-t-bug_on-if-khugepaged-yanks-our-page-table.patch
 userfaultfd-fix-checks-for-huge-pmds.patch
 fscache-delete-fscache_cookie_lru_timer-when-fscache-exits-to-avoid-uaf.patch
+eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch
+net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch
+net-mctp-serial-fix-missing-escapes-on-transmit.patch
+x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch
+x86-apic-make-x2apic_disable-work-correctly.patch
+revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch
+revert-wifi-ath11k-restore-country-code-during-resume.patch
+revert-wifi-ath11k-support-hibernation.patch
+tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch
+ila-call-nf_unregister_net_hooks-sooner.patch
+sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch
+nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch
+nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch
+nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch
+btrfs-qgroup-don-t-use-extent-changeset-when-not-needed.patch
+btrfs-zoned-handle-broken-write-pointer-on-zones.patch
+drm-xe-gsc-do-not-attempt-to-load-the-gsc-multiple-times.patch
diff --git a/queue-6.10/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch b/queue-6.10/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch
new file mode 100644 (file)
index 0000000..f4e35d9
--- /dev/null
@@ -0,0 +1,91 @@
+From fe1910f9337bd46a9343967b547ccab26b4b2c6e Mon Sep 17 00:00:00 2001
+From: Cong Wang <cong.wang@bytedance.com>
+Date: Tue, 20 Aug 2024 20:07:44 -0700
+Subject: tcp_bpf: fix return value of tcp_bpf_sendmsg()
+
+From: Cong Wang <cong.wang@bytedance.com>
+
+commit fe1910f9337bd46a9343967b547ccab26b4b2c6e upstream.
+
+When we cork messages in psock->cork, the last message triggers the
+flushing will result in sending a sk_msg larger than the current
+message size. In this case, in tcp_bpf_send_verdict(), 'copied' becomes
+negative at least in the following case:
+
+468         case __SK_DROP:
+469         default:
+470                 sk_msg_free_partial(sk, msg, tosend);
+471                 sk_msg_apply_bytes(psock, tosend);
+472                 *copied -= (tosend + delta); // <==== HERE
+473                 return -EACCES;
+
+Therefore, it could lead to the following BUG with a proper value of
+'copied' (thanks to syzbot). We should not use negative 'copied' as a
+return value here.
+
+  ------------[ cut here ]------------
+  kernel BUG at net/socket.c:733!
+  Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
+  Modules linked in:
+  CPU: 0 UID: 0 PID: 3265 Comm: syz-executor510 Not tainted 6.11.0-rc3-syzkaller-00060-gd07b43284ab3 #0
+  Hardware name: linux,dummy-virt (DT)
+  pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+  pc : sock_sendmsg_nosec net/socket.c:733 [inline]
+  pc : sock_sendmsg_nosec net/socket.c:728 [inline]
+  pc : __sock_sendmsg+0x5c/0x60 net/socket.c:745
+  lr : sock_sendmsg_nosec net/socket.c:730 [inline]
+  lr : __sock_sendmsg+0x54/0x60 net/socket.c:745
+  sp : ffff800088ea3b30
+  x29: ffff800088ea3b30 x28: fbf00000062bc900 x27: 0000000000000000
+  x26: ffff800088ea3bc0 x25: ffff800088ea3bc0 x24: 0000000000000000
+  x23: f9f00000048dc000 x22: 0000000000000000 x21: ffff800088ea3d90
+  x20: f9f00000048dc000 x19: ffff800088ea3d90 x18: 0000000000000001
+  x17: 0000000000000000 x16: 0000000000000000 x15: 000000002002ffaf
+  x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
+  x11: 0000000000000000 x10: ffff8000815849c0 x9 : ffff8000815b49c0
+  x8 : 0000000000000000 x7 : 000000000000003f x6 : 0000000000000000
+  x5 : 00000000000007e0 x4 : fff07ffffd239000 x3 : fbf00000062bc900
+  x2 : 0000000000000000 x1 : 0000000000000000 x0 : 00000000fffffdef
+  Call trace:
+   sock_sendmsg_nosec net/socket.c:733 [inline]
+   __sock_sendmsg+0x5c/0x60 net/socket.c:745
+   ____sys_sendmsg+0x274/0x2ac net/socket.c:2597
+   ___sys_sendmsg+0xac/0x100 net/socket.c:2651
+   __sys_sendmsg+0x84/0xe0 net/socket.c:2680
+   __do_sys_sendmsg net/socket.c:2689 [inline]
+   __se_sys_sendmsg net/socket.c:2687 [inline]
+   __arm64_sys_sendmsg+0x24/0x30 net/socket.c:2687
+   __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
+   invoke_syscall+0x48/0x110 arch/arm64/kernel/syscall.c:49
+   el0_svc_common.constprop.0+0x40/0xe0 arch/arm64/kernel/syscall.c:132
+   do_el0_svc+0x1c/0x28 arch/arm64/kernel/syscall.c:151
+   el0_svc+0x34/0xec arch/arm64/kernel/entry-common.c:712
+   el0t_64_sync_handler+0x100/0x12c arch/arm64/kernel/entry-common.c:730
+   el0t_64_sync+0x19c/0x1a0 arch/arm64/kernel/entry.S:598
+  Code: f9404463 d63f0060 3108441f 54fffe81 (d4210000)
+  ---[ end trace 0000000000000000 ]---
+
+Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
+Reported-by: syzbot+58c03971700330ce14d8@syzkaller.appspotmail.com
+Cc: Jakub Sitnicki <jakub@cloudflare.com>
+Signed-off-by: Cong Wang <cong.wang@bytedance.com>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
+Link: https://patch.msgid.link/20240821030744.320934-1-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bpf.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -577,7 +577,7 @@ out_err:
+               err = sk_stream_error(sk, msg->msg_flags, err);
+       release_sock(sk);
+       sk_psock_put(sk, psock);
+-      return copied ? copied : err;
++      return copied > 0 ? copied : err;
+ }
+ enum {
diff --git a/queue-6.10/x86-apic-make-x2apic_disable-work-correctly.patch b/queue-6.10/x86-apic-make-x2apic_disable-work-correctly.patch
new file mode 100644 (file)
index 0000000..52a1736
--- /dev/null
@@ -0,0 +1,60 @@
+From 0ecc5be200c84e67114f3640064ba2bae3ba2f5a Mon Sep 17 00:00:00 2001
+From: Yuntao Wang <yuntao.wang@linux.dev>
+Date: Tue, 13 Aug 2024 09:48:27 +0800
+Subject: x86/apic: Make x2apic_disable() work correctly
+
+From: Yuntao Wang <yuntao.wang@linux.dev>
+
+commit 0ecc5be200c84e67114f3640064ba2bae3ba2f5a upstream.
+
+x2apic_disable() clears x2apic_state and x2apic_mode unconditionally, even
+when the state is X2APIC_ON_LOCKED, which prevents the kernel to disable
+it thereby creating inconsistent state.
+
+Due to the early state check for X2APIC_ON, the code path which warns about
+a locked X2APIC cannot be reached.
+
+Test for state < X2APIC_ON instead and move the clearing of the state and
+mode variables to the place which actually disables X2APIC.
+
+[ tglx: Massaged change log. Added Fixes tag. Moved clearing so it's at the
+       right place for back ports ]
+
+Fixes: a57e456a7b28 ("x86/apic: Fix fallout from x2apic cleanup")
+Signed-off-by: Yuntao Wang <yuntao.wang@linux.dev>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240813014827.895381-1-yuntao.wang@linux.dev
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/apic.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -1775,12 +1775,9 @@ static __init void apic_set_fixmap(bool
+ static __init void x2apic_disable(void)
+ {
+-      u32 x2apic_id, state = x2apic_state;
++      u32 x2apic_id;
+-      x2apic_mode = 0;
+-      x2apic_state = X2APIC_DISABLED;
+-
+-      if (state != X2APIC_ON)
++      if (x2apic_state < X2APIC_ON)
+               return;
+       x2apic_id = read_apic_id();
+@@ -1793,6 +1790,10 @@ static __init void x2apic_disable(void)
+       }
+       __x2apic_disable();
++
++      x2apic_mode = 0;
++      x2apic_state = X2APIC_DISABLED;
++
+       /*
+        * Don't reread the APIC ID as it was already done from
+        * check_x2apic() and the APIC driver still is a x2APIC variant,
diff --git a/queue-6.10/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch b/queue-6.10/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch
new file mode 100644 (file)
index 0000000..d15760c
--- /dev/null
@@ -0,0 +1,91 @@
+From 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b Mon Sep 17 00:00:00 2001
+From: Mitchell Levy <levymitchell0@gmail.com>
+Date: Mon, 12 Aug 2024 13:44:12 -0700
+Subject: x86/fpu: Avoid writing LBR bit to IA32_XSS unless supported
+
+From: Mitchell Levy <levymitchell0@gmail.com>
+
+commit 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b upstream.
+
+There are two distinct CPU features related to the use of XSAVES and LBR:
+whether LBR is itself supported and whether XSAVES supports LBR. The LBR
+subsystem correctly checks both in intel_pmu_arch_lbr_init(), but the
+XSTATE subsystem does not.
+
+The LBR bit is only removed from xfeatures_mask_independent when LBR is not
+supported by the CPU, but there is no validation of XSTATE support.
+
+If XSAVES does not support LBR the write to IA32_XSS causes a #GP fault,
+leaving the state of IA32_XSS unchanged, i.e. zero. The fault is handled
+with a warning and the boot continues.
+
+Consequently the next XRSTORS which tries to restore supervisor state fails
+with #GP because the RFBM has zero for all supervisor features, which does
+not match the XCOMP_BV field.
+
+As XFEATURE_MASK_FPSTATE includes supervisor features setting up the FPU
+causes a #GP, which ends up in fpu_reset_from_exception_fixup(). That fails
+due to the same problem resulting in recursive #GPs until the kernel runs
+out of stack space and double faults.
+
+Prevent this by storing the supported independent features in
+fpu_kernel_cfg during XSTATE initialization and use that cached value for
+retrieving the independent feature bits to be written into IA32_XSS.
+
+[ tglx: Massaged change log ]
+
+Fixes: f0dccc9da4c0 ("x86/fpu/xstate: Support dynamic supervisor feature for LBR")
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240812-xsave-lbr-fix-v3-1-95bac1bf62f4@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/types.h |    7 +++++++
+ arch/x86/kernel/fpu/xstate.c     |    3 +++
+ arch/x86/kernel/fpu/xstate.h     |    4 ++--
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/types.h
++++ b/arch/x86/include/asm/fpu/types.h
+@@ -591,6 +591,13 @@ struct fpu_state_config {
+        * even without XSAVE support, i.e. legacy features FP + SSE
+        */
+       u64 legacy_features;
++      /*
++       * @independent_features:
++       *
++       * Features that are supported by XSAVES, but not managed as part of
++       * the FPU core, such as LBR
++       */
++      u64 independent_features;
+ };
+ /* FPU state configuration information */
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsi
+               goto out_disable;
+       }
++      fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
++                                            XFEATURE_MASK_INDEPENDENT;
++
+       /*
+        * Clear XSAVE features that are disabled in the normal CPUID.
+        */
+--- a/arch/x86/kernel/fpu/xstate.h
++++ b/arch/x86/kernel/fpu/xstate.h
+@@ -62,9 +62,9 @@ static inline u64 xfeatures_mask_supervi
+ static inline u64 xfeatures_mask_independent(void)
+ {
+       if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
+-              return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
++              return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR;
+-      return XFEATURE_MASK_INDEPENDENT;
++      return fpu_kernel_cfg.independent_features;
+ }
+ /* XSAVE/XRSTOR wrapper functions */