6.7-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)
diff --git a/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch b/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch

new file mode 100644 (file)

index 0000000..99e3e2b
--- /dev/null
+++ b/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch
@@ -0,0 +1,116 @@
+From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Thu, 4 Jan 2024 11:48:47 -0800
+Subject: btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 3324d0547861b16cf436d54abba7052e0c8aa9de upstream.
+
+Sweet Tea spotted a race between subvolume deletion and snapshotting
+that can result in the root item for the snapshot having the
+BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
+
+Thread 1                                      | Thread 2
+----------------------------------------------|----------
+btrfs_delete_subvolume                        |
+  btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
+                                              |btrfs_mksubvol
+                                              |  down_read(subvol_sem)
+                                              |  create_snapshot
+                                              |    ...
+                                              |    create_pending_snapshot
+                                              |      copy root item from source
+  down_write(subvol_sem)                      |
+
+This flag is only checked in send and swap activate, which this would
+cause to fail mysteriously.
+
+create_snapshot() now checks the root refs to reject a deleted
+subvolume, so we can fix this by locking subvol_sem earlier so that the
+BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
+
+CC: stable@vger.kernel.org # 4.14+
+Reported-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4449,6 +4449,8 @@ int btrfs_delete_subvolume(struct btrfs_
+       u64 root_flags;
+       int ret;
+ 
++      down_write(&fs_info->subvol_sem);
++
+       /*
+        * Don't allow to delete a subvolume with send in progress. This is
+        * inside the inode lock so the error handling that has to drop the bit
+@@ -4460,25 +4462,25 @@ int btrfs_delete_subvolume(struct btrfs_
+               btrfs_warn(fs_info,
+                          "attempt to delete subvolume %llu during send",
+                          dest->root_key.objectid);
+-              return -EPERM;
++              ret = -EPERM;
++              goto out_up_write;
+       }
+       if (atomic_read(&dest->nr_swapfiles)) {
+               spin_unlock(&dest->root_item_lock);
+               btrfs_warn(fs_info,
+                          "attempt to delete subvolume %llu with active swapfile",
+                          root->root_key.objectid);
+-              return -EPERM;
++              ret = -EPERM;
++              goto out_up_write;
+       }
+       root_flags = btrfs_root_flags(&dest->root_item);
+       btrfs_set_root_flags(&dest->root_item,
+                            root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+       spin_unlock(&dest->root_item_lock);
+ 
+-      down_write(&fs_info->subvol_sem);
+-
+       ret = may_destroy_subvol(dest);
+       if (ret)
+-              goto out_up_write;
++              goto out_undead;
+ 
+       btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
+       /*
+@@ -4488,7 +4490,7 @@ int btrfs_delete_subvolume(struct btrfs_
+        */
+       ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+       if (ret)
+-              goto out_up_write;
++              goto out_undead;
+ 
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+@@ -4554,15 +4556,17 @@ out_end_trans:
+       inode->i_flags |= S_DEAD;
+ out_release:
+       btrfs_subvolume_release_metadata(root, &block_rsv);
+-out_up_write:
+-      up_write(&fs_info->subvol_sem);
++out_undead:
+       if (ret) {
+               spin_lock(&dest->root_item_lock);
+               root_flags = btrfs_root_flags(&dest->root_item);
+               btrfs_set_root_flags(&dest->root_item,
+                               root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+               spin_unlock(&dest->root_item_lock);
+-      } else {
++      }
++out_up_write:
++      up_write(&fs_info->subvol_sem);
++      if (!ret) {
+               d_invalidate(dentry);
+               btrfs_prune_dentries(dest);
+               ASSERT(dest->send_in_progress == 0);
diff --git a/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch b/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch

new file mode 100644 (file)

index 0000000..bd9c0e4
--- /dev/null
+++ b/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch
@@ -0,0 +1,54 @@
+From 173431b274a9a54fc10b273b46e67f46bcf62d2e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 10 Jan 2024 08:58:26 +1030
+Subject: btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 173431b274a9a54fc10b273b46e67f46bcf62d2e upstream.
+
+Add extra sanity check for btrfs_ioctl_defrag_range_args::flags.
+
+This is not really to enhance fuzzing tests, but as a preparation for
+future expansion on btrfs_ioctl_defrag_range_args.
+
+In the future we're going to add new members, allowing more fine tuning
+for btrfs defrag.  Without the -ENONOTSUPP error, there would be no way
+to detect if the kernel supports those new defrag features.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c           |    4 ++++
+ include/uapi/linux/btrfs.h |    3 +++
+ 2 files changed, 7 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2608,6 +2608,10 @@ static int btrfs_ioctl_defrag(struct fil
+                               ret = -EFAULT;
+                               goto out;
+                       }
++                      if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
++                              ret = -EOPNOTSUPP;
++                              goto out;
++                      }
+                       /* compression requires us to start the IO */
+                       if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+                               range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -614,6 +614,9 @@ struct btrfs_ioctl_clone_range_args {
+  */
+ #define BTRFS_DEFRAG_RANGE_COMPRESS 1
+ #define BTRFS_DEFRAG_RANGE_START_IO 2
++#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS |          \
++                                       BTRFS_DEFRAG_RANGE_START_IO)
++
+ struct btrfs_ioctl_defrag_range_args {
+       /* start of the defrag operation */
+       __u64 start;
diff --git a/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch b/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch

new file mode 100644 (file)

index 0000000..b3b2731
--- /dev/null
+++ b/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch
@@ -0,0 +1,97 @@
+From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Thu, 4 Jan 2024 11:48:46 -0800
+Subject: btrfs: don't abort filesystem when attempting to snapshot deleted subvolume
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 7081929ab2572920e94d70be3d332e5c9f97095a upstream.
+
+If the source file descriptor to the snapshot ioctl refers to a deleted
+subvolume, we get the following abort:
+
+  BTRFS: Transaction aborted (error -2)
+  WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs]
+  Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c
+  CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014
+  RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs]
+  RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282
+  RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027
+  RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840
+  RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998
+  R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe
+  R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80
+  FS:  00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0
+  Call Trace:
+   <TASK>
+   ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+   ? __warn+0x81/0x130
+   ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+   ? report_bug+0x171/0x1a0
+   ? handle_bug+0x3a/0x70
+   ? exc_invalid_op+0x17/0x70
+   ? asm_exc_invalid_op+0x1a/0x20
+   ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+   ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+   create_pending_snapshots+0x92/0xc0 [btrfs]
+   btrfs_commit_transaction+0x66b/0xf40 [btrfs]
+   btrfs_mksubvol+0x301/0x4d0 [btrfs]
+   btrfs_mksnapshot+0x80/0xb0 [btrfs]
+   __btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs]
+   btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs]
+   btrfs_ioctl+0x8a6/0x2650 [btrfs]
+   ? kmem_cache_free+0x22/0x340
+   ? do_sys_openat2+0x97/0xe0
+   __x64_sys_ioctl+0x97/0xd0
+   do_syscall_64+0x46/0xf0
+   entry_SYSCALL_64_after_hwframe+0x6e/0x76
+  RIP: 0033:0x7fe20abe83af
+  RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+  RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af
+  RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003
+  RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0
+  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+  R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+  BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry
+  BTRFS info (device vdc: state EA): forced readonly
+  BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction.
+  BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry
+
+This happens because create_pending_snapshot() initializes the new root
+item as a copy of the source root item. This includes the refs field,
+which is 0 for a deleted subvolume. The call to btrfs_insert_root()
+therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then
+finds the root and returns -ENOENT if refs == 0, which causes
+create_pending_snapshot() to abort.
+
+Fix it by checking the source root's refs before attempting the
+snapshot, but after locking subvol_sem to avoid racing with deletion.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_
+               return -EOPNOTSUPP;
+       }
+ 
++      if (btrfs_root_refs(&root->root_item) == 0)
++              return -ENOENT;
++
+       if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+               return -EINVAL;
+ 
diff --git a/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch b/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch

new file mode 100644 (file)

index 0000000..2c1ee98
--- /dev/null
+++ b/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch
@@ -0,0 +1,37 @@
+From a208b3f132b48e1f94f620024e66fea635925877 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 15 Jan 2024 20:30:26 +0100
+Subject: btrfs: don't warn if discard range is not aligned to sector
+
+From: David Sterba <dsterba@suse.com>
+
+commit a208b3f132b48e1f94f620024e66fea635925877 upstream.
+
+There's a warning in btrfs_issue_discard() when the range is not aligned
+to 512 bytes, originally added in 4d89d377bbb0 ("btrfs:
+btrfs_issue_discard ensure offset/length are aligned to sector
+boundaries"). We can't do sub-sector writes anyway so the adjustment is
+the only thing that we can do and the warning is unnecessary.
+
+CC: stable@vger.kernel.org # 4.19+
+Reported-by: syzbot+4a4f1eba14eb5c3417d1@syzkaller.appspotmail.com
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct bl
+       u64 bytes_left, end;
+       u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
+ 
+-      if (WARN_ON(start != aligned_start)) {
++      /* Adjust the range to be aligned to 512B sectors if necessary. */
++      if (start != aligned_start) {
+               len -= aligned_start - start;
+               len = round_down(len, 1 << SECTOR_SHIFT);
+               start = aligned_start;
diff --git a/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch b/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch

new file mode 100644 (file)

index 0000000..c42b72d
--- /dev/null
+++ b/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch
@@ -0,0 +1,57 @@
+From f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 Mon Sep 17 00:00:00 2001
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Wed, 3 Jan 2024 13:31:27 +0300
+Subject: btrfs: ref-verify: free ref cache before clearing mount opt
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+commit f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 upstream.
+
+As clearing REF_VERIFY mount option indicates there were some errors in a
+ref-verify process, a ref cache is not relevant anymore and should be
+freed.
+
+btrfs_free_ref_cache() requires REF_VERIFY option being set so call
+it just before clearing the mount option.
+
+Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
+
+Reported-by: syzbot+be14ed7728594dc8bd42@syzkaller.appspotmail.com
+Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool")
+CC: stable@vger.kernel.org # 5.4+
+Closes: https://lore.kernel.org/lkml/000000000000e5a65c05ee832054@google.com/
+Reported-by: syzbot+c563a3c79927971f950f@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/0000000000007fe09705fdc6086c@google.com/
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ref-verify.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/ref-verify.c
++++ b/fs/btrfs/ref-verify.c
+@@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_i
+ out_unlock:
+       spin_unlock(&fs_info->ref_verify_lock);
+ out:
+-      if (ret)
++      if (ret) {
++              btrfs_free_ref_cache(fs_info);
+               btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
++      }
+       return ret;
+ }
+ 
+@@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs
+               }
+       }
+       if (ret) {
+-              btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+               btrfs_free_ref_cache(fs_info);
++              btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+       }
+       btrfs_free_path(path);
+       return ret;
diff --git a/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch b/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch

new file mode 100644 (file)

index 0000000..1af82a6
--- /dev/null
+++ b/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch
@@ -0,0 +1,35 @@
+From f398e70dd69e6ceea71463a5380e6118f219197e Mon Sep 17 00:00:00 2001
+From: Chung-Chiang Cheng <cccheng@synology.com>
+Date: Fri, 12 Jan 2024 15:41:05 +0800
+Subject: btrfs: tree-checker: fix inline ref size in error messages
+
+From: Chung-Chiang Cheng <cccheng@synology.com>
+
+commit f398e70dd69e6ceea71463a5380e6118f219197e upstream.
+
+The error message should accurately reflect the size rather than the
+type.
+
+Fixes: f82d1c7ca8ae ("btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Chung-Chiang Cheng <cccheng@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1436,7 +1436,7 @@ static int check_extent_item(struct exte
+               if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
+                       extent_err(leaf, slot,
+ "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
+-                                 ptr, inline_type, end);
++                                 ptr, btrfs_extent_inline_ref_size(inline_type), end);
+                       return -EUCLEAN;
+               }
+ 
diff --git a/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch b/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch

new file mode 100644 (file)

index 0000000..ded22c8
--- /dev/null
+++ b/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch
@@ -0,0 +1,99 @@
+From b18f3b60b35a8c01c9a2a0f0d6424c6d73971dc3 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 22 Dec 2023 13:56:34 +0900
+Subject: btrfs: zoned: fix lock ordering in btrfs_zone_activate()
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit b18f3b60b35a8c01c9a2a0f0d6424c6d73971dc3 upstream.
+
+The btrfs CI reported a lockdep warning as follows by running generic
+generic/129.
+
+   WARNING: possible circular locking dependency detected
+   6.7.0-rc5+ #1 Not tainted
+   ------------------------------------------------------
+   kworker/u5:5/793427 is trying to acquire lock:
+   ffff88813256d028 (&cache->lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x5e/0x130
+   but task is already holding lock:
+   ffff88810a23a318 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x34/0x130
+   which lock already depends on the new lock.
+
+   the existing dependency chain (in reverse order) is:
+   -> #1 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}:
+   ...
+   -> #0 (&cache->lock){+.+.}-{2:2}:
+   ...
+
+This is because we take fs_info->zone_active_bgs_lock after a block_group's
+lock in btrfs_zone_activate() while doing the opposite in other places.
+
+Fix the issue by expanding the fs_info->zone_active_bgs_lock's critical
+section and taking it before a block_group's lock.
+
+Fixes: a7e1ac7bdc5a ("btrfs: zoned: reserve zones for an active metadata/system block group")
+CC: stable@vger.kernel.org # 6.6
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2094,6 +2094,7 @@ bool btrfs_zone_activate(struct btrfs_bl
+ 
+       map = block_group->physical_map;
+ 
++      spin_lock(&fs_info->zone_active_bgs_lock);
+       spin_lock(&block_group->lock);
+       if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
+               ret = true;
+@@ -2106,7 +2107,6 @@ bool btrfs_zone_activate(struct btrfs_bl
+               goto out_unlock;
+       }
+ 
+-      spin_lock(&fs_info->zone_active_bgs_lock);
+       for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_zoned_device_info *zinfo;
+               int reserved = 0;
+@@ -2126,20 +2126,17 @@ bool btrfs_zone_activate(struct btrfs_bl
+                */
+               if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+                       ret = false;
+-                      spin_unlock(&fs_info->zone_active_bgs_lock);
+                       goto out_unlock;
+               }
+ 
+               if (!btrfs_dev_set_active_zone(device, physical)) {
+                       /* Cannot activate the zone */
+                       ret = false;
+-                      spin_unlock(&fs_info->zone_active_bgs_lock);
+                       goto out_unlock;
+               }
+               if (!is_data)
+                       zinfo->reserved_active_zones--;
+       }
+-      spin_unlock(&fs_info->zone_active_bgs_lock);
+ 
+       /* Successfully activated all the zones */
+       set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
+@@ -2147,8 +2144,6 @@ bool btrfs_zone_activate(struct btrfs_bl
+ 
+       /* For the active block group list */
+       btrfs_get_block_group(block_group);
+-
+-      spin_lock(&fs_info->zone_active_bgs_lock);
+       list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+       spin_unlock(&fs_info->zone_active_bgs_lock);
+ 
+@@ -2156,6 +2151,7 @@ bool btrfs_zone_activate(struct btrfs_bl
+ 
+ out_unlock:
+       spin_unlock(&block_group->lock);
++      spin_unlock(&fs_info->zone_active_bgs_lock);
+       return ret;
+ }
+ 
diff --git a/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch b/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch

new file mode 100644 (file)

index 0000000..b172728
--- /dev/null
+++ b/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch
@@ -0,0 +1,138 @@
+From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 22 Jan 2024 15:18:11 +0100
+Subject: cpufreq: intel_pstate: Refine computation of P-state for given frequency
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 192cdb1c907fd8df2d764c5bb17496e415e59391 upstream.
+
+On systems using HWP, if a given frequency is equal to the maximum turbo
+frequency or the maximum non-turbo frequency, the HWP performance level
+corresponding to it is already known and can be used directly without
+any computation.
+
+Accordingly, adjust the code to use the known HWP performance levels in
+the cases mentioned above.
+
+This also helps to avoid limiting CPU capacity artificially in some
+cases when the BIOS produces the HWP_CAP numbers using a different
+E-core-to-P-core performance scaling factor than expected by the kernel.
+
+Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores")
+Cc: 6.1+ <stable@vger.kernel.org> # 6.1+
+Tested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c |   55 +++++++++++++++++++++++++----------------
+ 1 file changed, 34 insertions(+), 21 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -526,6 +526,30 @@ static int intel_pstate_cppc_get_scaling
+ }
+ #endif /* CONFIG_ACPI_CPPC_LIB */
+ 
++static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq,
++                                      unsigned int relation)
++{
++      if (freq == cpu->pstate.turbo_freq)
++              return cpu->pstate.turbo_pstate;
++
++      if (freq == cpu->pstate.max_freq)
++              return cpu->pstate.max_pstate;
++
++      switch (relation) {
++      case CPUFREQ_RELATION_H:
++              return freq / cpu->pstate.scaling;
++      case CPUFREQ_RELATION_C:
++              return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling);
++      }
++
++      return DIV_ROUND_UP(freq, cpu->pstate.scaling);
++}
++
++static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq)
++{
++      return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L);
++}
++
+ /**
+  * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
+  * @cpu: Target CPU.
+@@ -543,6 +567,7 @@ static void intel_pstate_hybrid_hwp_adju
+       int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+       int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu);
+       int scaling = cpu->pstate.scaling;
++      int freq;
+ 
+       pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+       pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+@@ -556,16 +581,16 @@ static void intel_pstate_hybrid_hwp_adju
+       cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
+                                        perf_ctl_scaling);
+ 
+-      cpu->pstate.max_pstate_physical =
+-                      DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
+-                                   scaling);
++      freq = perf_ctl_max_phys * perf_ctl_scaling;
++      cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq);
+ 
+-      cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
++      freq = cpu->pstate.min_pstate * perf_ctl_scaling;
++      cpu->pstate.min_freq = freq;
+       /*
+        * Cast the min P-state value retrieved via pstate_funcs.get_min() to
+        * the effective range of HWP performance levels.
+        */
+-      cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
++      cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
+ }
+ 
+ static inline void update_turbo_state(void)
+@@ -2524,13 +2549,12 @@ static void intel_pstate_update_perf_lim
+        * abstract values to represent performance rather than pure ratios.
+        */
+       if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
+-              int scaling = cpu->pstate.scaling;
+               int freq;
+ 
+               freq = max_policy_perf * perf_ctl_scaling;
+-              max_policy_perf = DIV_ROUND_UP(freq, scaling);
++              max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
+               freq = min_policy_perf * perf_ctl_scaling;
+-              min_policy_perf = DIV_ROUND_UP(freq, scaling);
++              min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
+       }
+ 
+       pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
+@@ -2904,18 +2928,7 @@ static int intel_cpufreq_target(struct c
+ 
+       cpufreq_freq_transition_begin(policy, &freqs);
+ 
+-      switch (relation) {
+-      case CPUFREQ_RELATION_L:
+-              target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
+-              break;
+-      case CPUFREQ_RELATION_H:
+-              target_pstate = freqs.new / cpu->pstate.scaling;
+-              break;
+-      default:
+-              target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
+-              break;
+-      }
+-
++      target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation);
+       target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
+ 
+       freqs.new = target_pstate * cpu->pstate.scaling;
+@@ -2933,7 +2946,7 @@ static unsigned int intel_cpufreq_fast_s
+ 
+       update_turbo_state();
+ 
+-      target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
++      target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq);
+ 
+       target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
+ 
diff --git a/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch b/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch

new file mode 100644 (file)

index 0000000..a39368d
--- /dev/null
+++ b/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch
@@ -0,0 +1,49 @@
+From bc03c02cc1991a066b23e69bbcc0f66e8f1f7453 Mon Sep 17 00:00:00 2001
+From: Ma Jun <Jun.Ma2@amd.com>
+Date: Fri, 12 Jan 2024 13:33:24 +0800
+Subject: drm/amdgpu: Fix the null pointer when load rlc firmware
+
+From: Ma Jun <Jun.Ma2@amd.com>
+
+commit bc03c02cc1991a066b23e69bbcc0f66e8f1f7453 upstream.
+
+If the RLC firmware is invalid because of wrong header size,
+the pointer to the rlc firmware is released in function
+amdgpu_ucode_request. There will be a null pointer error
+in subsequent use. So skip validation to fix it.
+
+Fixes: 3da9b71563cb ("drm/amd: Use `amdgpu_ucode_*` helpers for GFX10")
+Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c |   15 ++++++---------
+ 1 file changed, 6 insertions(+), 9 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(stru
+ 
+       if (!amdgpu_sriov_vf(adev)) {
+               snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+-              err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+-              /* don't check this.  There are apparently firmwares in the wild with
+-               * incorrect size in the header
+-               */
+-              if (err == -ENODEV)
+-                      goto out;
++              err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+               if (err)
+-                      dev_dbg(adev->dev,
+-                              "gfx10: amdgpu_ucode_request() failed \"%s\"\n",
+-                              fw_name);
++                      goto out;
++
++              /* don't validate this firmware. There are apparently firmwares
++               * in the wild with incorrect size in the header
++               */
+               rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+               version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+               version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
diff --git a/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch b/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch

new file mode 100644 (file)

index 0000000..bde8ac9
--- /dev/null
+++ b/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch
@@ -0,0 +1,37 @@
+From 84c39ec57d409e803a9bb6e4e85daf1243e0e80b Mon Sep 17 00:00:00 2001
+From: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Date: Mon, 22 Jan 2024 19:34:21 +0100
+Subject: exec: Fix error handling in begin_new_exec()
+
+From: Bernd Edlinger <bernd.edlinger@hotmail.de>
+
+commit 84c39ec57d409e803a9bb6e4e85daf1243e0e80b upstream.
+
+If get_unused_fd_flags() fails, the error handling is incomplete because
+bprm->cred is already set to NULL, and therefore free_bprm will not
+unlock the cred_guard_mutex. Note there are two error conditions which
+end up here, one before and one after bprm->cred is cleared.
+
+Fixes: b8a61c9e7b4a ("exec: Generic execfd support")
+Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Acked-by: Eric W. Biederman <ebiederm@xmission.com>
+Link: https://lore.kernel.org/r/AS8P193MB128517ADB5EFF29E04389EDAE4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1408,6 +1408,9 @@ int begin_new_exec(struct linux_binprm *
+ 
+ out_unlock:
+       up_write(&me->signal->exec_update_lock);
++      if (!bprm->cred)
++              mutex_unlock(&me->signal->cred_guard_mutex);
++
+ out:
+       return retval;
+ }
diff --git a/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch b/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch

new file mode 100644 (file)

index 0000000..3e22944
--- /dev/null
+++ b/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch
@@ -0,0 +1,96 @@
+From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001
+From: Cristian Marussi <cristian.marussi@arm.com>
+Date: Wed, 20 Dec 2023 17:21:12 +0000
+Subject: firmware: arm_scmi: Check mailbox/SMT channel for consistency
+
+From: Cristian Marussi <cristian.marussi@arm.com>
+
+commit 437a310b22244d4e0b78665c3042e5d1c0f45306 upstream.
+
+On reception of a completion interrupt the shared memory area is accessed
+to retrieve the message header at first and then, if the message sequence
+number identifies a transaction which is still pending, the related
+payload is fetched too.
+
+When an SCMI command times out the channel ownership remains with the
+platform until eventually a late reply is received and, as a consequence,
+any further transmission attempt remains pending, waiting for the channel
+to be relinquished by the platform.
+
+Once that late reply is received the channel ownership is given back
+to the agent and any pending request is then allowed to proceed and
+overwrite the SMT area of the just delivered late reply; then the wait
+for the reply to the new request starts.
+
+It has been observed that the spurious IRQ related to the late reply can
+be wrongly associated with the freshly enqueued request: when that happens
+the SCMI stack in-flight lookup procedure is fooled by the fact that the
+message header now present in the SMT area is related to the new pending
+transaction, even though the real reply has still to arrive.
+
+This race-condition on the A2P channel can be detected by looking at the
+channel status bits: a genuine reply from the platform will have set the
+channel free bit before triggering the completion IRQ.
+
+Add a consistency check to validate such condition in the A2P ISR.
+
+Reported-by: Xinglong Yang <xinglong.yang@cixtech.com>
+Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/
+Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type")
+Cc: stable@vger.kernel.org # 5.15+
+Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
+Tested-by: Xinglong Yang <xinglong.yang@cixtech.com>
+Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/arm_scmi/common.h  |    1 +
+ drivers/firmware/arm_scmi/mailbox.c |   14 ++++++++++++++
+ drivers/firmware/arm_scmi/shmem.c   |    6 ++++++
+ 3 files changed, 21 insertions(+)
+
+--- a/drivers/firmware/arm_scmi/common.h
++++ b/drivers/firmware/arm_scmi/common.h
+@@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scm
+ void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem);
+ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
+                    struct scmi_xfer *xfer);
++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem);
+ 
+ /* declarations for message passing transports */
+ struct scmi_msg_payld;
+--- a/drivers/firmware/arm_scmi/mailbox.c
++++ b/drivers/firmware/arm_scmi/mailbox.c
+@@ -45,6 +45,20 @@ static void rx_callback(struct mbox_clie
+ {
+       struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl);
+ 
++      /*
++       * An A2P IRQ is NOT valid when received while the platform still has
++       * the ownership of the channel, because the platform at first releases
++       * the SMT channel and then sends the completion interrupt.
++       *
++       * This addresses a possible race condition in which a spurious IRQ from
++       * a previous timed-out reply which arrived late could be wrongly
++       * associated with the next pending transaction.
++       */
++      if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) {
++              dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n");
++              return;
++      }
++
+       scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL);
+ }
+ 
+--- a/drivers/firmware/arm_scmi/shmem.c
++++ b/drivers/firmware/arm_scmi/shmem.c
+@@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_
+               (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR |
+                SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
+ }
++
++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem)
++{
++      return (ioread32(&shmem->channel_status) &
++                      SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
++}
diff --git a/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch b/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch

new file mode 100644 (file)

index 0000000..d7a0e71
--- /dev/null
+++ b/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch
@@ -0,0 +1,46 @@
+From 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Wed, 17 Jan 2024 08:29:42 -0600
+Subject: gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 upstream.
+
+Spurious wakeups are reported on the GPD G1619-04 which
+can be absolved by programming the GPIO to ignore wakeups.
+
+Cc: stable@vger.kernel.org
+Reported-and-tested-by: George Melikov <mail@gmelikov.ru>
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpio/gpiolib-acpi.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/drivers/gpio/gpiolib-acpi.c
++++ b/drivers/gpio/gpiolib-acpi.c
+@@ -1651,6 +1651,20 @@ static const struct dmi_system_id gpioli
+                       .ignore_interrupt = "INT33FC:00@3",
+               },
+       },
++      {
++              /*
++               * Spurious wakeups from TP_ATTN# pin
++               * Found in BIOS 0.35
++               * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
++               */
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
++              },
++              .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
++                      .ignore_wake = "PNP0C50:00@8",
++              },
++      },
+       {} /* Terminating entry */
+ };
+ 
diff --git a/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch b/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch

new file mode 100644 (file)

index 0000000..b7a876c
--- /dev/null
+++ b/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch
@@ -0,0 +1,56 @@
+From 6941f67ad37d5465b75b9ffc498fcf6897a3c00e Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mhklinux@outlook.com>
+Date: Mon, 22 Jan 2024 08:20:28 -0800
+Subject: hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+commit 6941f67ad37d5465b75b9ffc498fcf6897a3c00e upstream.
+
+Current code in netvsc_drv_init() incorrectly assumes that PAGE_SIZE
+is 4 Kbytes, which is wrong on ARM64 with 16K or 64K page size. As a
+result, the default VMBus ring buffer size on ARM64 with 64K page size
+is 8 Mbytes instead of the expected 512 Kbytes. While this doesn't break
+anything, a typical VM with 8 vCPUs and 8 netvsc channels wastes 120
+Mbytes (8 channels * 2 ring buffers/channel * 7.5 Mbytes/ring buffer).
+
+Unfortunately, the module parameter specifying the ring buffer size
+is in units of 4 Kbyte pages. Ideally, it should be in units that
+are independent of PAGE_SIZE, but backwards compatibility prevents
+changing that now.
+
+Fix this by having netvsc_drv_init() hardcode 4096 instead of using
+PAGE_SIZE when calculating the ring buffer size in bytes. Also
+use the VMBUS_RING_SIZE macro to ensure proper alignment when running
+with page size larger than 4K.
+
+Cc: <stable@vger.kernel.org> # 5.15.x
+Fixes: 7aff79e297ee ("Drivers: hv: Enable Hyper-V code to be built on ARM64")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/20240122162028.348885-1-mhklinux@outlook.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -44,7 +44,7 @@
+ 
+ static unsigned int ring_size __ro_after_init = 128;
+ module_param(ring_size, uint, 0444);
+-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
++MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
+ unsigned int netvsc_ring_bytes __ro_after_init;
+ 
+ static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+@@ -2805,7 +2805,7 @@ static int __init netvsc_drv_init(void)
+               pr_info("Increased ring_size to %u (min allowed)\n",
+                       ring_size);
+       }
+-      netvsc_ring_bytes = ring_size * PAGE_SIZE;
++      netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
+ 
+       register_netdevice_notifier(&netvsc_netdev_notifier);
+ 
diff --git a/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch b/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch

new file mode 100644 (file)

index 0000000..c67b18b
--- /dev/null
+++ b/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch
@@ -0,0 +1,126 @@
+From ebeae8adf89d9a82359f6659b1663d09beec2faa Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Sun, 21 Jan 2024 15:35:06 +0800
+Subject: ksmbd: fix global oob in ksmbd_nl_policy
+
+From: Lin Ma <linma@zju.edu.cn>
+
+commit ebeae8adf89d9a82359f6659b1663d09beec2faa upstream.
+
+Similar to a reported issue (check the commit b33fb5b801c6 ("net:
+qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds
+another global out-of-bounds read for policy ksmbd_nl_policy. See bug
+trace below:
+
+==================================================================
+BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline]
+BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600
+Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810
+
+CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G                 N 6.1.0 #3
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:284 [inline]
+ print_report+0x172/0x475 mm/kasan/report.c:395
+ kasan_report+0xbb/0x1c0 mm/kasan/report.c:495
+ validate_nla lib/nlattr.c:386 [inline]
+ __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600
+ __nla_parse+0x3e/0x50 lib/nlattr.c:697
+ __nlmsg_parse include/net/netlink.h:748 [inline]
+ genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565
+ genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734
+ genl_family_rcv_msg net/netlink/genetlink.c:833 [inline]
+ genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850
+ netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540
+ genl_rcv+0x24/0x40 net/netlink/genetlink.c:861
+ netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345
+ netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921
+ sock_sendmsg_nosec net/socket.c:714 [inline]
+ sock_sendmsg+0x154/0x190 net/socket.c:734
+ ____sys_sendmsg+0x6df/0x840 net/socket.c:2482
+ ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536
+ __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7fdd66a8f359
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359
+RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003
+RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000
+ </TASK>
+
+The buggy address belongs to the variable:
+ ksmbd_nl_policy+0x100/0xa80
+
+The buggy address belongs to the physical page:
+page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b
+flags: 0x200000000001000(reserved|node=0|zone=2)
+raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000
+raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+>ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9
+                   ^
+ ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05
+ ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9
+==================================================================
+
+To fix it, add a placeholder named __KSMBD_EVENT_MAX and let
+KSMBD_EVENT_MAX to be its original value - 1 according to what other
+netlink families do. Also change two sites that refer the
+KSMBD_EVENT_MAX to correct value.
+
+Cc: stable@vger.kernel.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/ksmbd_netlink.h |    3 ++-
+ fs/smb/server/transport_ipc.c |    4 ++--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/smb/server/ksmbd_netlink.h
++++ b/fs/smb/server/ksmbd_netlink.h
+@@ -304,7 +304,8 @@ enum ksmbd_event {
+       KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+       KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE      = 15,
+ 
+-      KSMBD_EVENT_MAX
++      __KSMBD_EVENT_MAX,
++      KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1
+ };
+ 
+ /*
+--- a/fs/smb/server/transport_ipc.c
++++ b/fs/smb/server/transport_ipc.c
+@@ -74,7 +74,7 @@ static int handle_unsupported_event(stru
+ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info);
+ static int ksmbd_ipc_heartbeat_request(void);
+ 
+-static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = {
++static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = {
+       [KSMBD_EVENT_UNSPEC] = {
+               .len = 0,
+       },
+@@ -403,7 +403,7 @@ static int handle_generic_event(struct s
+               return -EPERM;
+ #endif
+ 
+-      if (type >= KSMBD_EVENT_MAX) {
++      if (type > KSMBD_EVENT_MAX) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
diff --git a/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch b/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch

new file mode 100644 (file)

index 0000000..1005511
--- /dev/null
+++ b/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch
@@ -0,0 +1,69 @@
+From f342de4e2f33e0e39165d8639387aa6c19dff660 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Sat, 20 Jan 2024 22:50:04 +0100
+Subject: netfilter: nf_tables: reject QUEUE/DROP verdict parameters
+
+From: Florian Westphal <fw@strlen.de>
+
+commit f342de4e2f33e0e39165d8639387aa6c19dff660 upstream.
+
+This reverts commit e0abdadcc6e1.
+
+core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP
+verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar,
+or 0.
+
+Due to the reverted commit, its possible to provide a positive
+value, e.g. NF_ACCEPT (1), which results in use-after-free.
+
+Its not clear to me why this commit was made.
+
+NF_QUEUE is not used by nftables; "queue" rules in nftables
+will result in use of "nft_queue" expression.
+
+If we later need to allow specifiying errno values from userspace
+(do not know why), this has to call NF_DROP_GETERR and check that
+"err <= 0" holds true.
+
+Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters")
+Cc: stable@vger.kernel.org
+Reported-by: Notselwyn <notselwyn@pwning.tech>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10909,16 +10909,10 @@ static int nft_verdict_init(const struct
+       data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ 
+       switch (data->verdict.code) {
+-      default:
+-              switch (data->verdict.code & NF_VERDICT_MASK) {
+-              case NF_ACCEPT:
+-              case NF_DROP:
+-              case NF_QUEUE:
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
+-              fallthrough;
++      case NF_ACCEPT:
++      case NF_DROP:
++      case NF_QUEUE:
++              break;
+       case NFT_CONTINUE:
+       case NFT_BREAK:
+       case NFT_RETURN:
+@@ -10953,6 +10947,8 @@ static int nft_verdict_init(const struct
+ 
+               data->verdict.chain = chain;
+               break;
++      default:
++              return -EINVAL;
+       }
+ 
+       desc->len = sizeof(data->verdict);
diff --git a/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch b/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch

new file mode 100644 (file)

index 0000000..ce0b468
--- /dev/null
+++ b/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch
@@ -0,0 +1,57 @@
+From 01acb2e8666a6529697141a6017edbf206921913 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 18 Jan 2024 10:56:26 +0100
+Subject: netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 01acb2e8666a6529697141a6017edbf206921913 upstream.
+
+Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER
+event is reported, otherwise a stale reference to netdevice remains in
+the hook list.
+
+Fixes: 60a3815da702 ("netfilter: add inet ingress support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_chain_filter.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nft_chain_filter.c
++++ b/net/netfilter/nft_chain_filter.c
+@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct
+                                 unsigned long event, void *ptr)
+ {
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++      struct nft_base_chain *basechain;
+       struct nftables_pernet *nft_net;
+-      struct nft_table *table;
+       struct nft_chain *chain, *nr;
++      struct nft_table *table;
+       struct nft_ctx ctx = {
+               .net    = dev_net(dev),
+       };
+@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct
+       nft_net = nft_pernet(ctx.net);
+       mutex_lock(&nft_net->commit_mutex);
+       list_for_each_entry(table, &nft_net->tables, list) {
+-              if (table->family != NFPROTO_NETDEV)
++              if (table->family != NFPROTO_NETDEV &&
++                  table->family != NFPROTO_INET)
+                       continue;
+ 
+               ctx.family = table->family;
+@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct
+                       if (!nft_is_base_chain(chain))
+                               continue;
+ 
++                      basechain = nft_base_chain(chain);
++                      if (table->family == NFPROTO_INET &&
++                          basechain->ops.hooknum != NF_INET_INGRESS)
++                              continue;
++
+                       ctx.chain = chain;
+                       nft_netdev_event(event, dev, &ctx);
+               }
diff --git a/queue-6.7/nfsd-fix-release_lockowner.patch b/queue-6.7/nfsd-fix-release_lockowner.patch

new file mode 100644 (file)

index 0000000..2d6202a
--- /dev/null
+++ b/queue-6.7/nfsd-fix-release_lockowner.patch
@@ -0,0 +1,144 @@
+From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+commit edcf9725150e42beeca42d085149f4c88fa97afd upstream.
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful.  Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held.  This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible.  It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero.  However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked().  It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids.  If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER.  That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded).  When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid.  It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe.  For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps.  With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, st
+ {
+       struct file_lock *fl;
+       int status = false;
+-      struct nfsd_file *nf = find_any_file(fp);
++      struct nfsd_file *nf;
+       struct inode *inode;
+       struct file_lock_context *flctx;
+ 
++      spin_lock(&fp->fi_lock);
++      nf = find_any_file_locked(fp);
+       if (!nf) {
+               /* Any valid lock stateid should have some sort of access */
+               WARN_ON_ONCE(1);
+-              return status;
++              goto out;
+       }
+ 
+       inode = file_inode(nf->nf_file);
+@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, st
+               }
+               spin_unlock(&flctx->flc_lock);
+       }
+-      nfsd_file_put(nf);
++out:
++      spin_unlock(&fp->fi_lock);
+       return status;
+ }
+ 
+@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, st
+  * @cstate: NFSv4 COMPOUND state
+  * @u: RELEASE_LOCKOWNER arguments
+  *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+  *
+  * Return values:
+  *   %nfs_ok: lockowner released or not found
+@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst
+               spin_unlock(&clp->cl_lock);
+               return nfs_ok;
+       }
+-      if (atomic_read(&lo->lo_owner.so_count) != 2) {
+-              spin_unlock(&clp->cl_lock);
+-              nfs4_put_stateowner(&lo->lo_owner);
+-              return nfserr_locks_held;
++
++      list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++              if (check_for_locks(stp->st_stid.sc_file, lo)) {
++                      spin_unlock(&clp->cl_lock);
++                      nfs4_put_stateowner(&lo->lo_owner);
++                      return nfserr_locks_held;
++              }
+       }
+       unhash_lockowner_locked(lo);
+       while (!list_empty(&lo->lo_owner.so_stateids)) {
diff --git a/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch b/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch

new file mode 100644 (file)

index 0000000..fe9ecf2
--- /dev/null
+++ b/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch
@@ -0,0 +1,459 @@
+From 420332b94119cdc7db4477cc88484691cb92ae71 Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Sat, 20 Jan 2024 12:18:39 +0200
+Subject: ovl: mark xwhiteouts directory with overlay.opaque='x'
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 420332b94119cdc7db4477cc88484691cb92ae71 upstream.
+
+An opaque directory cannot have xwhiteouts, so instead of marking an
+xwhiteouts directory with a new xattr, overload overlay.opaque xattr
+for marking both opaque dir ('y') and xwhiteouts dir ('x').
+
+This is more efficient as the overlay.opaque xattr is checked during
+lookup of directory anyway.
+
+This also prevents unnecessary checking the xattr when reading a
+directory without xwhiteouts, i.e. most of the time.
+
+Note that the xwhiteouts marker is not checked on the upper layer and
+on the last layer in lowerstack, where xwhiteouts are not expected.
+
+Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout")
+Cc: <stable@vger.kernel.org> # v6.7
+Reviewed-by: Alexander Larsson <alexl@redhat.com>
+Tested-by: Alexander Larsson <alexl@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/filesystems/overlayfs.rst |   16 +++++++--
+ fs/overlayfs/namei.c                    |   43 ++++++++++++++++---------
+ fs/overlayfs/overlayfs.h                |   23 ++++++++++---
+ fs/overlayfs/ovl_entry.h                |    4 +-
+ fs/overlayfs/readdir.c                  |    7 ++--
+ fs/overlayfs/super.c                    |   15 +++++++++
+ fs/overlayfs/util.c                     |   53 ++++++++++++++++++--------------
+ 7 files changed, 110 insertions(+), 51 deletions(-)
+
+--- a/Documentation/filesystems/overlayfs.rst
++++ b/Documentation/filesystems/overlayfs.rst
+@@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs
+ that files have been removed.  This is done using whiteouts and opaque
+ directories (non-directories are always opaque).
+ 
+-A whiteout is created as a character device with 0/0 device number.
++A whiteout is created as a character device with 0/0 device number or
++as a zero-size regular file with the xattr "trusted.overlay.whiteout".
++
+ When a whiteout is found in the upper level of a merged directory, any
+ matching name in the lower level is ignored, and the whiteout itself
+ is also hidden.
+@@ -154,6 +156,13 @@ A directory is made opaque by setting th
+ to "y".  Where the upper filesystem contains an opaque directory, any
+ directory in the lower filesystem with the same name is ignored.
+ 
++An opaque directory should not conntain any whiteouts, because they do not
++serve any purpose.  A merge directory containing regular files with the xattr
++"trusted.overlay.whiteout", should be additionally marked by setting the xattr
++"trusted.overlay.opaque" to "x" on the merge directory itself.
++This is needed to avoid the overhead of checking the "trusted.overlay.whiteout"
++on all entries during readdir in the common case.
++
+ readdir
+ -------
+ 
+@@ -534,8 +543,9 @@ A lower dir with a regular whiteout will
+ mount, so to support storing an effective whiteout file in an overlayfs mount an
+ alternative form of whiteout is supported. This form is a regular, zero-size
+ file with the "overlay.whiteout" xattr set, inside a directory with the
+-"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
+-but can be used by userspace tools (like containers) that generate lower layers.
++"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_).
++These alternative whiteouts are never created by overlayfs, but can be used by
++userspace tools (like containers) that generate lower layers.
+ These alternative whiteouts can be escaped using the standard xattr escape
+ mechanism in order to properly nest to any depth.
+ 
+--- a/fs/overlayfs/namei.c
++++ b/fs/overlayfs/namei.c
+@@ -18,10 +18,11 @@
+ 
+ struct ovl_lookup_data {
+       struct super_block *sb;
+-      struct vfsmount *mnt;
++      const struct ovl_layer *layer;
+       struct qstr name;
+       bool is_dir;
+       bool opaque;
++      bool xwhiteouts;
+       bool stop;
+       bool last;
+       char *redirect;
+@@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct
+       return real;
+ }
+ 
+-static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
+-{
+-      return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
+-}
+-
+ static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
+                                                  const char *name,
+                                                  struct dentry *base, int len,
+                                                  bool drop_negative)
+ {
+-      struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
++      struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
++                                               base, len);
+ 
+       if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
+               if (drop_negative && ret->d_lockref.count == 1) {
+@@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dent
+                            size_t prelen, const char *post,
+                            struct dentry **ret, bool drop_negative)
+ {
++      struct ovl_fs *ofs = OVL_FS(d->sb);
+       struct dentry *this;
+       struct path path;
+       int err;
+       bool last_element = !post[0];
++      bool is_upper = d->layer->idx == 0;
++      char val;
+ 
+       this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
+       if (IS_ERR(this)) {
+@@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dent
+       }
+ 
+       path.dentry = this;
+-      path.mnt = d->mnt;
+-      if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
++      path.mnt = d->layer->mnt;
++      if (ovl_path_is_whiteout(ofs, &path)) {
+               d->stop = d->opaque = true;
+               goto put_and_out;
+       }
+@@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dent
+                       d->stop = true;
+                       goto put_and_out;
+               }
+-              err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL);
++              err = ovl_check_metacopy_xattr(ofs, &path, NULL);
+               if (err < 0)
+                       goto out_err;
+ 
+@@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dent
+               if (d->last)
+                       goto out;
+ 
+-              if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
++              /* overlay.opaque=x means xwhiteouts directory */
++              val = ovl_get_opaquedir_val(ofs, &path);
++              if (last_element && !is_upper && val == 'x') {
++                      d->xwhiteouts = true;
++                      ovl_layer_set_xwhiteouts(ofs, d->layer);
++              } else if (val == 'y') {
+                       d->stop = true;
+                       if (last_element)
+                               d->opaque = true;
+@@ -863,7 +868,8 @@ fail:
+  * Returns next layer in stack starting from top.
+  * Returns -1 if this is the last layer.
+  */
+-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
++int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
++                const struct ovl_layer **layer)
+ {
+       struct ovl_entry *oe = OVL_E(dentry);
+       struct ovl_path *lowerstack = ovl_lowerstack(oe);
+@@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry
+       BUG_ON(idx < 0);
+       if (idx == 0) {
+               ovl_path_upper(dentry, path);
+-              if (path->dentry)
++              if (path->dentry) {
++                      *layer = &OVL_FS(dentry->d_sb)->layers[0];
+                       return ovl_numlower(oe) ? 1 : -1;
++              }
+               idx++;
+       }
+       BUG_ON(idx > ovl_numlower(oe));
+       path->dentry = lowerstack[idx - 1].dentry;
+-      path->mnt = lowerstack[idx - 1].layer->mnt;
++      *layer = lowerstack[idx - 1].layer;
++      path->mnt = (*layer)->mnt;
+ 
+       return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
+ }
+@@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode *
+       old_cred = ovl_override_creds(dentry->d_sb);
+       upperdir = ovl_dentry_upper(dentry->d_parent);
+       if (upperdir) {
+-              d.mnt = ovl_upper_mnt(ofs);
++              d.layer = &ofs->layers[0];
+               err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
+               if (err)
+                       goto out;
+@@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode *
+               else if (d.is_dir || !ofs->numdatalayer)
+                       d.last = lower.layer->idx == ovl_numlower(roe);
+ 
+-              d.mnt = lower.layer->mnt;
++              d.layer = lower.layer;
+               err = ovl_lookup_layer(lower.dentry, &d, &this, false);
+               if (err)
+                       goto out_put;
+@@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode *
+ 
+       if (upperopaque)
+               ovl_dentry_set_opaque(dentry);
++      if (d.xwhiteouts)
++              ovl_dentry_set_xwhiteouts(dentry);
+ 
+       if (upperdentry)
+               ovl_dentry_set_upper_alias(dentry);
+--- a/fs/overlayfs/overlayfs.h
++++ b/fs/overlayfs/overlayfs.h
+@@ -50,7 +50,6 @@ enum ovl_xattr {
+       OVL_XATTR_METACOPY,
+       OVL_XATTR_PROTATTR,
+       OVL_XATTR_XWHITEOUT,
+-      OVL_XATTR_XWHITEOUTS,
+ };
+ 
+ enum ovl_inode_flag {
+@@ -70,6 +69,8 @@ enum ovl_entry_flag {
+       OVL_E_UPPER_ALIAS,
+       OVL_E_OPAQUE,
+       OVL_E_CONNECTED,
++      /* Lower stack may contain xwhiteout entries */
++      OVL_E_XWHITEOUTS,
+ };
+ 
+ enum {
+@@ -471,6 +472,10 @@ bool ovl_dentry_test_flag(unsigned long
+ bool ovl_dentry_is_opaque(struct dentry *dentry);
+ bool ovl_dentry_is_whiteout(struct dentry *dentry);
+ void ovl_dentry_set_opaque(struct dentry *dentry);
++bool ovl_dentry_has_xwhiteouts(struct dentry *dentry);
++void ovl_dentry_set_xwhiteouts(struct dentry *dentry);
++void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
++                            const struct ovl_layer *layer);
+ bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+ void ovl_dentry_set_upper_alias(struct dentry *dentry);
+ bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
+@@ -488,11 +493,10 @@ struct file *ovl_path_open(const struct
+ int ovl_copy_up_start(struct dentry *dentry, int flags);
+ void ovl_copy_up_end(struct dentry *dentry);
+ bool ovl_already_copied_up(struct dentry *dentry, int flags);
+-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+-                            enum ovl_xattr ox);
++char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
++                         enum ovl_xattr ox);
+ bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
+ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
+-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
+ bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
+                        const struct path *upperpath);
+ 
+@@ -567,7 +571,13 @@ static inline bool ovl_is_impuredir(stru
+               .mnt = ovl_upper_mnt(ofs),
+       };
+ 
+-      return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE);
++      return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y';
++}
++
++static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs,
++                                       const struct path *path)
++{
++      return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE);
+ }
+ 
+ static inline bool ovl_redirect_follow(struct ovl_fs *ofs)
+@@ -674,7 +684,8 @@ int ovl_get_index_name(struct ovl_fs *of
+ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+                               struct dentry *origin, bool verify);
+-int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
++int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
++                const struct ovl_layer **layer);
+ int ovl_verify_lowerdata(struct dentry *dentry);
+ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+                         unsigned int flags);
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -40,6 +40,8 @@ struct ovl_layer {
+       int idx;
+       /* One fsid per unique underlying sb (upper fsid == 0) */
+       int fsid;
++      /* xwhiteouts were found on this layer */
++      bool has_xwhiteouts;
+ };
+ 
+ struct ovl_path {
+@@ -59,7 +61,7 @@ struct ovl_fs {
+       unsigned int numfs;
+       /* Number of data-only lower layers */
+       unsigned int numdatalayer;
+-      const struct ovl_layer *layers;
++      struct ovl_layer *layers;
+       struct ovl_sb *fs;
+       /* workbasedir is the path at workdir= mount option */
+       struct dentry *workbasedir;
+--- a/fs/overlayfs/readdir.c
++++ b/fs/overlayfs/readdir.c
+@@ -305,8 +305,6 @@ static inline int ovl_dir_read(const str
+       if (IS_ERR(realfile))
+               return PTR_ERR(realfile);
+ 
+-      rdd->in_xwhiteouts_dir = rdd->dentry &&
+-              ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
+       rdd->first_maybe_whiteout = NULL;
+       rdd->ctx.pos = 0;
+       do {
+@@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct de
+               .is_lowest = false,
+       };
+       int idx, next;
++      const struct ovl_layer *layer;
+ 
+       for (idx = 0; idx != -1; idx = next) {
+-              next = ovl_path_next(idx, dentry, &realpath);
++              next = ovl_path_next(idx, dentry, &realpath, &layer);
+               rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
++              rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
++                                      ovl_dentry_has_xwhiteouts(dentry);
+ 
+               if (next != -1) {
+                       err = ovl_dir_read(&realpath, &rdd);
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -1250,6 +1250,7 @@ static struct dentry *ovl_get_root(struc
+                                  struct ovl_entry *oe)
+ {
+       struct dentry *root;
++      struct ovl_fs *ofs = OVL_FS(sb);
+       struct ovl_path *lowerpath = ovl_lowerstack(oe);
+       unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
+       int fsid = lowerpath->layer->fsid;
+@@ -1271,6 +1272,20 @@ static struct dentry *ovl_get_root(struc
+                       ovl_set_flag(OVL_IMPURE, d_inode(root));
+       }
+ 
++      /* Look for xwhiteouts marker except in the lowermost layer */
++      for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) {
++              struct path path = {
++                      .mnt = lowerpath->layer->mnt,
++                      .dentry = lowerpath->dentry,
++              };
++
++              /* overlay.opaque=x means xwhiteouts directory */
++              if (ovl_get_opaquedir_val(ofs, &path) == 'x') {
++                      ovl_layer_set_xwhiteouts(ofs, lowerpath->layer);
++                      ovl_dentry_set_xwhiteouts(root);
++              }
++      }
++
+       /* Root is always merge -> can have whiteouts */
+       ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
+       ovl_dentry_set_flag(OVL_E_CONNECTED, root);
+--- a/fs/overlayfs/util.c
++++ b/fs/overlayfs/util.c
+@@ -461,6 +461,33 @@ void ovl_dentry_set_opaque(struct dentry
+       ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
+ }
+ 
++bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
++{
++      return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
++}
++
++void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
++{
++      ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
++}
++
++/*
++ * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
++ * dentry to dcache, while readdir of that same directory happens after
++ * the overlay dir dentry is in dcache, so if some cpu observes that
++ * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
++ * for the layers where xwhiteouts marker was found in that merge dir.
++ */
++void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
++                            const struct ovl_layer *layer)
++{
++      if (layer->has_xwhiteouts)
++              return;
++
++      /* Write once to read-mostly layer properties */
++      ofs->layers[layer->idx].has_xwhiteouts = true;
++}
++
+ /*
+  * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
+  * to return positive, while there's no actual upper alias for the inode.
+@@ -739,19 +766,6 @@ bool ovl_path_check_xwhiteout_xattr(stru
+       return res >= 0;
+ }
+ 
+-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
+-{
+-      struct dentry *dentry = path->dentry;
+-      int res;
+-
+-      /* xattr.whiteouts must be a directory */
+-      if (!d_is_dir(dentry))
+-              return false;
+-
+-      res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0);
+-      return res >= 0;
+-}
+-
+ /*
+  * Load persistent uuid from xattr into s_uuid if found, or store a new
+  * random generated value in s_uuid and in xattr.
+@@ -811,20 +825,17 @@ fail:
+       return false;
+ }
+ 
+-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
+-                             enum ovl_xattr ox)
++char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
++                         enum ovl_xattr ox)
+ {
+       int res;
+       char val;
+ 
+       if (!d_is_dir(path->dentry))
+-              return false;
++              return 0;
+ 
+       res = ovl_path_getxattr(ofs, path, ox, &val, 1);
+-      if (res == 1 && val == 'y')
+-              return true;
+-
+-      return false;
++      return res == 1 ? val : 0;
+ }
+ 
+ #define OVL_XATTR_OPAQUE_POSTFIX      "opaque"
+@@ -837,7 +848,6 @@ bool ovl_path_check_dir_xattr(struct ovl
+ #define OVL_XATTR_METACOPY_POSTFIX    "metacopy"
+ #define OVL_XATTR_PROTATTR_POSTFIX    "protattr"
+ #define OVL_XATTR_XWHITEOUT_POSTFIX   "whiteout"
+-#define OVL_XATTR_XWHITEOUTS_POSTFIX  "whiteouts"
+ 
+ #define OVL_XATTR_TAB_ENTRY(x) \
+       [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
+@@ -854,7 +864,6 @@ const char *const ovl_xattr_table[][2] =
+       OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
+       OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
+       OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
+-      OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS),
+ };
+ 
+ int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
diff --git a/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch b/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch

new file mode 100644 (file)

index 0000000..0c1f52a
--- /dev/null
+++ b/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch
@@ -0,0 +1,266 @@
+From 416de0246f35f43d871a57939671fe814f4455ee Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Thu, 4 Jan 2024 15:59:03 -0700
+Subject: platform/x86: intel-uncore-freq: Fix types in sysfs callbacks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 416de0246f35f43d871a57939671fe814f4455ee upstream.
+
+When booting a kernel with CONFIG_CFI_CLANG, there is a CFI failure when
+accessing any of the values under
+/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00:
+
+  $ cat /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz
+  fish: Job 1, 'cat /sys/devices/system/cpu/int…' terminated by signal SIGSEGV (Address boundary error)
+
+  $ sudo dmesg &| grep 'CFI failure'
+  [  170.953925] CFI failure at kobj_attr_show+0x19/0x30 (target: show_max_freq_khz+0x0/0xc0 [intel_uncore_frequency_common]; expected type: 0xd34078c5
+
+The sysfs callback functions such as show_domain_id() are written as if
+they are going to be called by dev_attr_show() but as the above message
+shows, they are instead called by kobj_attr_show(). kCFI checks that the
+destination of an indirect jump has the exact same type as the prototype
+of the function pointer it is called through and fails when they do not.
+
+These callbacks are called through kobj_attr_show() because
+uncore_root_kobj was initialized with kobject_create_and_add(), which
+means uncore_root_kobj has a ->sysfs_ops of kobj_sysfs_ops from
+kobject_create(), which uses kobj_attr_show() as its ->show() value.
+
+The only reason there has not been a more noticeable problem until this
+point is that 'struct kobj_attribute' and 'struct device_attribute' have
+the same layout, so getting the callback from container_of() works the
+same with either value.
+
+Change all the callbacks and their uses to be compatible with
+kobj_attr_show() and kobj_attr_store(), which resolves the kCFI failure
+and allows the sysfs files to work properly.
+
+Closes: https://github.com/ClangBuiltLinux/linux/issues/1974
+Fixes: ae7b2ce57851 ("platform/x86/intel/uncore-freq: Use sysfs API to create attributes")
+Cc: stable@vger.kernel.org
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Link: https://lore.kernel.org/r/20240104-intel-uncore-freq-kcfi-fix-v1-1-bf1e8939af40@kernel.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c |   82 +++++-----
+ drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h |   32 +--
+ 2 files changed, 57 insertions(+), 57 deletions(-)
+
+--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
+@@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_
+ static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max);
+ static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq);
+ 
+-static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf)
++static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ {
+-      struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr);
++      struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr);
+ 
+       return sprintf(buf, "%u\n", data->domain_id);
+ }
+ 
+-static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf)
++static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ {
+-      struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr);
++      struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr);
+ 
+       return sprintf(buf, "%u\n", data->cluster_id);
+ }
+ 
+-static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf)
++static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+ {
+-      struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr);
++      struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr);
+ 
+       return sprintf(buf, "%u\n", data->package_id);
+ }
+@@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz
+ }
+ 
+ #define store_uncore_min_max(name, min_max)                           \
+-      static ssize_t store_##name(struct device *dev,         \
+-                                   struct device_attribute *attr,     \
++      static ssize_t store_##name(struct kobject *kobj,               \
++                                   struct kobj_attribute *attr,       \
+                                    const char *buf, size_t count)     \
+       {                                                               \
+-              struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
++              struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
+                                                                       \
+               return store_min_max_freq_khz(data, buf, count, \
+                                             min_max);         \
+       }
+ 
+ #define show_uncore_min_max(name, min_max)                            \
+-      static ssize_t show_##name(struct device *dev,          \
+-                                  struct device_attribute *attr, char *buf)\
++      static ssize_t show_##name(struct kobject *kobj,                \
++                                  struct kobj_attribute *attr, char *buf)\
+       {                                                               \
+-              struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
++              struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
+                                                                       \
+               return show_min_max_freq_khz(data, buf, min_max);       \
+       }
+ 
+ #define show_uncore_perf_status(name)                                 \
+-      static ssize_t show_##name(struct device *dev,          \
+-                                 struct device_attribute *attr, char *buf)\
++      static ssize_t show_##name(struct kobject *kobj,                \
++                                 struct kobj_attribute *attr, char *buf)\
+       {                                                               \
+-              struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\
++              struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\
+                                                                       \
+               return show_perf_status_freq_khz(data, buf); \
+       }
+@@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1);
+ show_uncore_perf_status(current_freq_khz);
+ 
+ #define show_uncore_data(member_name)                                 \
+-      static ssize_t show_##member_name(struct device *dev,   \
+-                                         struct device_attribute *attr, char *buf)\
++      static ssize_t show_##member_name(struct kobject *kobj, \
++                                         struct kobj_attribute *attr, char *buf)\
+       {                                                               \
+               struct uncore_data *data = container_of(attr, struct uncore_data,\
+-                                                        member_name##_dev_attr);\
++                                                        member_name##_kobj_attr);\
+                                                                       \
+               return sysfs_emit(buf, "%u\n",                          \
+                                data->member_name);                    \
+@@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz);
+ 
+ #define init_attribute_rw(_name)                                      \
+       do {                                                            \
+-              sysfs_attr_init(&data->_name##_dev_attr.attr);  \
+-              data->_name##_dev_attr.show = show_##_name;             \
+-              data->_name##_dev_attr.store = store_##_name;           \
+-              data->_name##_dev_attr.attr.name = #_name;              \
+-              data->_name##_dev_attr.attr.mode = 0644;                \
++              sysfs_attr_init(&data->_name##_kobj_attr.attr); \
++              data->_name##_kobj_attr.show = show_##_name;            \
++              data->_name##_kobj_attr.store = store_##_name;          \
++              data->_name##_kobj_attr.attr.name = #_name;             \
++              data->_name##_kobj_attr.attr.mode = 0644;               \
+       } while (0)
+ 
+ #define init_attribute_ro(_name)                                      \
+       do {                                                            \
+-              sysfs_attr_init(&data->_name##_dev_attr.attr);  \
+-              data->_name##_dev_attr.show = show_##_name;             \
+-              data->_name##_dev_attr.store = NULL;                    \
+-              data->_name##_dev_attr.attr.name = #_name;              \
+-              data->_name##_dev_attr.attr.mode = 0444;                \
++              sysfs_attr_init(&data->_name##_kobj_attr.attr); \
++              data->_name##_kobj_attr.show = show_##_name;            \
++              data->_name##_kobj_attr.store = NULL;                   \
++              data->_name##_kobj_attr.attr.name = #_name;             \
++              data->_name##_kobj_attr.attr.mode = 0444;               \
+       } while (0)
+ 
+ #define init_attribute_root_ro(_name)                                 \
+       do {                                                            \
+-              sysfs_attr_init(&data->_name##_dev_attr.attr);  \
+-              data->_name##_dev_attr.show = show_##_name;             \
+-              data->_name##_dev_attr.store = NULL;                    \
+-              data->_name##_dev_attr.attr.name = #_name;              \
+-              data->_name##_dev_attr.attr.mode = 0400;                \
++              sysfs_attr_init(&data->_name##_kobj_attr.attr); \
++              data->_name##_kobj_attr.show = show_##_name;            \
++              data->_name##_kobj_attr.store = NULL;                   \
++              data->_name##_kobj_attr.attr.name = #_name;             \
++              data->_name##_kobj_attr.attr.mode = 0400;               \
+       } while (0)
+ 
+ static int create_attr_group(struct uncore_data *data, char *name)
+@@ -186,21 +186,21 @@ static int create_attr_group(struct unco
+ 
+       if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) {
+               init_attribute_root_ro(domain_id);
+-              data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr;
++              data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr;
+               init_attribute_root_ro(fabric_cluster_id);
+-              data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr;
++              data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr;
+               init_attribute_root_ro(package_id);
+-              data->uncore_attrs[index++] = &data->package_id_dev_attr.attr;
++              data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr;
+       }
+ 
+-      data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr;
+-      data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr;
+-      data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr;
+-      data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr;
++      data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr;
++      data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr;
++      data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr;
++      data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr;
+ 
+       ret = uncore_read_freq(data, &freq);
+       if (!ret)
+-              data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr;
++              data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr;
+ 
+       data->uncore_attrs[index] = NULL;
+ 
+--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
+@@ -26,14 +26,14 @@
+  * @instance_id:      Unique instance id to append to directory name
+  * @name:             Sysfs entry name for this instance
+  * @uncore_attr_group:        Attribute group storage
+- * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz
+- * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz
+- * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz
+- * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz
+- * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz
+- * @domain_id_dev_attr: Storage for device attribute domain_id
+- * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id
+- * @package_id_dev_attr: Storage for device attribute package_id
++ * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz
++ * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz
++ * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz
++ * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz
++ * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz
++ * @domain_id_kobj_attr: Storage for kobject attribute domain_id
++ * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id
++ * @package_id_kobj_attr: Storage for kobject attribute package_id
+  * @uncore_attrs:     Attribute storage for group creation
+  *
+  * This structure is used to encapsulate all data related to uncore sysfs
+@@ -53,14 +53,14 @@ struct uncore_data {
+       char name[32];
+ 
+       struct attribute_group uncore_attr_group;
+-      struct device_attribute max_freq_khz_dev_attr;
+-      struct device_attribute min_freq_khz_dev_attr;
+-      struct device_attribute initial_max_freq_khz_dev_attr;
+-      struct device_attribute initial_min_freq_khz_dev_attr;
+-      struct device_attribute current_freq_khz_dev_attr;
+-      struct device_attribute domain_id_dev_attr;
+-      struct device_attribute fabric_cluster_id_dev_attr;
+-      struct device_attribute package_id_dev_attr;
++      struct kobj_attribute max_freq_khz_kobj_attr;
++      struct kobj_attribute min_freq_khz_kobj_attr;
++      struct kobj_attribute initial_max_freq_khz_kobj_attr;
++      struct kobj_attribute initial_min_freq_khz_kobj_attr;
++      struct kobj_attribute current_freq_khz_kobj_attr;
++      struct kobj_attribute domain_id_kobj_attr;
++      struct kobj_attribute fabric_cluster_id_kobj_attr;
++      struct kobj_attribute package_id_kobj_attr;
+       struct attribute *uncore_attrs[9];
+ };
+ 
diff --git a/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch b/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch

new file mode 100644 (file)

index 0000000..bedf4ef
--- /dev/null
+++ b/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch
@@ -0,0 +1,293 @@
+From 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Mon, 8 Jan 2024 15:20:58 +0900
+Subject: platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b upstream.
+
+p2sb_bar() unhides P2SB device to get resources from the device. It
+guards the operation by locking pci_rescan_remove_lock so that parallel
+rescans do not find the P2SB device. However, this lock causes deadlock
+when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan
+locks pci_rescan_remove_lock and probes PCI devices. When PCI devices
+call p2sb_bar() during probe, it locks pci_rescan_remove_lock again.
+Hence the deadlock.
+
+To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar().
+Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources()
+for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(),
+refer the cache and return to the caller.
+
+Before operating the device at P2SB DEVFN for resource cache, check
+that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH
+specifications define. This avoids unexpected operation to other devices
+at the same DEVFN.
+
+Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/
+Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support")
+Cc: stable@vger.kernel.org
+Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Tested-by Klara Modin <klarasmodin@gmail.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/p2sb.c |  180 +++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 139 insertions(+), 41 deletions(-)
+
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_
+       {}
+ };
+ 
++/*
++ * Cache BAR0 of P2SB device functions 0 to 7.
++ * TODO: The constant 8 is the number of functions that PCI specification
++ *       defines. Same definitions exist tree-wide. Unify this definition and
++ *       the other definitions then move to include/uapi/linux/pci.h.
++ */
++#define NR_P2SB_RES_CACHE 8
++
++struct p2sb_res_cache {
++      u32 bus_dev_id;
++      struct resource res;
++};
++
++static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
++
+ static int p2sb_get_devfn(unsigned int *devfn)
+ {
+       unsigned int fn = P2SB_DEVFN_DEFAULT;
+@@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *
+       return 0;
+ }
+ 
++static bool p2sb_valid_resource(struct resource *res)
++{
++      if (res->flags)
++              return true;
++
++      return false;
++}
++
+ /* Copy resource from the first BAR of the device in question */
+-static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
++static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
+ {
+       struct resource *bar0 = &pdev->resource[0];
+ 
+@@ -56,49 +79,66 @@ static int p2sb_read_bar0(struct pci_dev
+       mem->end = bar0->end;
+       mem->flags = bar0->flags;
+       mem->desc = bar0->desc;
+-
+-      return 0;
+ }
+ 
+-static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
+ {
++      struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
+       struct pci_dev *pdev;
+-      int ret;
+ 
+       pdev = pci_scan_single_device(bus, devfn);
+       if (!pdev)
+-              return -ENODEV;
++              return;
+ 
+-      ret = p2sb_read_bar0(pdev, mem);
++      p2sb_read_bar0(pdev, &cache->res);
++      cache->bus_dev_id = bus->dev.id;
+ 
+       pci_stop_and_remove_bus_device(pdev);
+-      return ret;
+ }
+ 
+-/**
+- * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+- * @bus: PCI bus to communicate with
+- * @devfn: PCI slot and function to communicate with
+- * @mem: memory resource to be filled in
+- *
+- * The BIOS prevents the P2SB device from being enumerated by the PCI
+- * subsystem, so we need to unhide and hide it back to lookup the BAR.
+- *
+- * if @bus is NULL, the bus 0 in domain 0 will be used.
+- * If @devfn is 0, it will be replaced by devfn of the P2SB device.
+- *
+- * Caller must provide a valid pointer to @mem.
+- *
+- * Locking is handled by pci_rescan_remove_lock mutex.
+- *
+- * Return:
+- * 0 on success or appropriate errno value on error.
+- */
+-int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
++{
++      unsigned int slot, fn;
++
++      if (PCI_FUNC(devfn) == 0) {
++              /*
++               * When function number of the P2SB device is zero, scan it and
++               * other function numbers, and if devices are available, cache
++               * their BAR0s.
++               */
++              slot = PCI_SLOT(devfn);
++              for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++)
++                      p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn));
++      } else {
++              /* Scan the P2SB device and cache its BAR0 */
++              p2sb_scan_and_cache_devfn(bus, devfn);
++      }
++
++      if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
++              return -ENOENT;
++
++      return 0;
++}
++
++static struct pci_bus *p2sb_get_bus(struct pci_bus *bus)
++{
++      static struct pci_bus *p2sb_bus;
++
++      bus = bus ?: p2sb_bus;
++      if (bus)
++              return bus;
++
++      /* Assume P2SB is on the bus 0 in domain 0 */
++      p2sb_bus = pci_find_bus(0, 0);
++      return p2sb_bus;
++}
++
++static int p2sb_cache_resources(void)
+ {
+-      struct pci_dev *pdev_p2sb;
+       unsigned int devfn_p2sb;
+       u32 value = P2SBC_HIDE;
++      struct pci_bus *bus;
++      u16 class;
+       int ret;
+ 
+       /* Get devfn for P2SB device itself */
+@@ -106,8 +146,17 @@ int p2sb_bar(struct pci_bus *bus, unsign
+       if (ret)
+               return ret;
+ 
+-      /* if @bus is NULL, use bus 0 in domain 0 */
+-      bus = bus ?: pci_find_bus(0, 0);
++      bus = p2sb_get_bus(NULL);
++      if (!bus)
++              return -ENODEV;
++
++      /*
++       * When a device with same devfn exists and its device class is not
++       * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it.
++       */
++      pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class);
++      if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER)
++              return -ENODEV;
+ 
+       /*
+        * Prevent concurrent PCI bus scan from seeing the P2SB device and
+@@ -115,17 +164,16 @@ int p2sb_bar(struct pci_bus *bus, unsign
+        */
+       pci_lock_rescan_remove();
+ 
+-      /* Unhide the P2SB device, if needed */
++      /*
++       * The BIOS prevents the P2SB device from being enumerated by the PCI
++       * subsystem, so we need to unhide and hide it back to lookup the BAR.
++       * Unhide the P2SB device here, if needed.
++       */
+       pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+       if (value & P2SBC_HIDE)
+               pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+ 
+-      pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb);
+-      if (devfn)
+-              ret = p2sb_scan_and_read(bus, devfn, mem);
+-      else
+-              ret = p2sb_read_bar0(pdev_p2sb, mem);
+-      pci_stop_and_remove_bus_device(pdev_p2sb);
++      ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+ 
+       /* Hide the P2SB device, if it was hidden */
+       if (value & P2SBC_HIDE)
+@@ -133,12 +181,62 @@ int p2sb_bar(struct pci_bus *bus, unsign
+ 
+       pci_unlock_rescan_remove();
+ 
+-      if (ret)
+-              return ret;
++      return ret;
++}
+ 
+-      if (mem->flags == 0)
++/**
++ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
++ * @bus: PCI bus to communicate with
++ * @devfn: PCI slot and function to communicate with
++ * @mem: memory resource to be filled in
++ *
++ * If @bus is NULL, the bus 0 in domain 0 will be used.
++ * If @devfn is 0, it will be replaced by devfn of the P2SB device.
++ *
++ * Caller must provide a valid pointer to @mem.
++ *
++ * Return:
++ * 0 on success or appropriate errno value on error.
++ */
++int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++{
++      struct p2sb_res_cache *cache;
++      int ret;
++
++      bus = p2sb_get_bus(bus);
++      if (!bus)
+               return -ENODEV;
+ 
++      if (!devfn) {
++              ret = p2sb_get_devfn(&devfn);
++              if (ret)
++                      return ret;
++      }
++
++      cache = &p2sb_resources[PCI_FUNC(devfn)];
++      if (cache->bus_dev_id != bus->dev.id)
++              return -ENODEV;
++
++      if (!p2sb_valid_resource(&cache->res))
++              return -ENOENT;
++
++      memcpy(mem, &cache->res, sizeof(*mem));
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
++
++static int __init p2sb_fs_init(void)
++{
++      p2sb_cache_resources();
++      return 0;
++}
++
++/*
++ * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can
++ * not be locked in sysfs pci bus rescan path because of deadlock. To
++ * avoid the deadlock, access to P2SB devices with the lock at an early
++ * step in kernel initialization and cache required resources. This
++ * should happen after subsys_initcall which initializes PCI subsystem
++ * and before device_initcall which requires P2SB resources.
++ */
++fs_initcall(p2sb_fs_init);
diff --git a/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch b/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch

new file mode 100644 (file)

index 0000000..18f034e
--- /dev/null
+++ b/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch
@@ -0,0 +1,77 @@
+From ded080c86b3f99683774af0441a58fc2e3d60cae Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 17 Jan 2024 18:59:44 +0100
+Subject: rbd: don't move requests to the running list on errors
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit ded080c86b3f99683774af0441a58fc2e3d60cae upstream.
+
+The running list is supposed to contain requests that are pinning the
+exclusive lock, i.e. those that must be flushed before exclusive lock
+is released.  When wake_lock_waiters() is called to handle an error,
+requests on the acquiring list are failed with that error and no
+flushing takes place.  Briefly moving them to the running list is not
+only pointless but also harmful: if exclusive lock gets acquired
+before all of their state machines are scheduled and go through
+rbd_lock_del_request(), we trigger
+
+    rbd_assert(list_empty(&rbd_dev->running_list));
+
+in rbd_try_acquire_lock().
+
+Cc: stable@vger.kernel.org
+Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c |   22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct
+ static void rbd_lock_del_request(struct rbd_img_request *img_req)
+ {
+       struct rbd_device *rbd_dev = img_req->rbd_dev;
+-      bool need_wakeup;
++      bool need_wakeup = false;
+ 
+       lockdep_assert_held(&rbd_dev->lock_rwsem);
+       spin_lock(&rbd_dev->lock_lists_lock);
+-      rbd_assert(!list_empty(&img_req->lock_item));
+-      list_del_init(&img_req->lock_item);
+-      need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+-                     list_empty(&rbd_dev->running_list));
++      if (!list_empty(&img_req->lock_item)) {
++              list_del_init(&img_req->lock_item);
++              need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
++                             list_empty(&rbd_dev->running_list));
++      }
+       spin_unlock(&rbd_dev->lock_lists_lock);
+       if (need_wakeup)
+               complete(&rbd_dev->releasing_wait);
+@@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd
+               return;
+       }
+ 
+-      list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
++      while (!list_empty(&rbd_dev->acquiring_list)) {
++              img_req = list_first_entry(&rbd_dev->acquiring_list,
++                                         struct rbd_img_request, lock_item);
+               mutex_lock(&img_req->state_mutex);
+               rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
++              if (!result)
++                      list_move_tail(&img_req->lock_item,
++                                     &rbd_dev->running_list);
++              else
++                      list_del_init(&img_req->lock_item);
+               rbd_img_schedule(img_req, result);
+               mutex_unlock(&img_req->state_mutex);
+       }
+-
+-      list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
+ }
+ 
+ static bool locker_equal(const struct ceph_locker *lhs,
diff --git a/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch b/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch

new file mode 100644 (file)

index 0000000..cb35276
--- /dev/null
+++ b/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch
@@ -0,0 +1,47 @@
+From d1b163aa0749706379055e40a52cf7a851abf9dc Mon Sep 17 00:00:00 2001
+From: Thomas Zimmermann <tzimmermann@suse.de>
+Date: Tue, 23 Jan 2024 13:09:26 +0100
+Subject: Revert "drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync"
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+commit d1b163aa0749706379055e40a52cf7a851abf9dc upstream.
+
+This reverts commit 60aebc9559492cea6a9625f514a8041717e3a2e4.
+
+Commit 60aebc9559492cea ("drivers/firmware: Move sysfb_init() from
+device_initcall to subsys_initcall_sync") messes up initialization order
+of the graphics drivers and leads to blank displays on some systems. So
+revert the commit.
+
+To make the display drivers fully independent from initialization
+order requires to track framebuffer memory by device and independently
+from the loaded drivers. The kernel currently lacks the infrastructure
+to do so.
+
+Reported-by: Jaak Ristioja <jaak@ristioja.ee>
+Closes: https://lore.kernel.org/dri-devel/ZUnNi3q3yB3zZfTl@P70.localdomain/T/#t
+Reported-by: Huacai Chen <chenhuacai@loongson.cn>
+Closes: https://lore.kernel.org/dri-devel/20231108024613.2898921-1-chenhuacai@loongson.cn/
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10133
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Jani Nikula <jani.nikula@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.5+
+Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
+Acked-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240123120937.27736-1-tzimmermann@suse.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/sysfb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/firmware/sysfb.c
++++ b/drivers/firmware/sysfb.c
+@@ -128,4 +128,4 @@ unlock_mutex:
+ }
+ 
+ /* must execute after PCI subsystem for EFI quirks */
+-subsys_initcall_sync(sysfb_init);
++device_initcall(sysfb_init);
diff --git a/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch b/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch

new file mode 100644 (file)

index 0000000..d30df0d
--- /dev/null
+++ b/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch
@@ -0,0 +1,63 @@
+From 6992eb815d087858f8d7e4020529c2fe800456b3 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Tue, 16 Jan 2024 23:08:21 +0200
+Subject: Revert "drm/i915/dsi: Do display on sequence later on icl+"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit 6992eb815d087858f8d7e4020529c2fe800456b3 upstream.
+
+This reverts commit 88b065943cb583e890324d618e8d4b23460d51a3.
+
+Lenovo 82TQ is unhappy if we do the display on sequence this
+late. The display output shows severe corruption.
+
+It's unclear if this is a failure on our part (perhaps
+something to do with sending commands in LP mode after HS
+/video mode transmission has been started? Though the backlight
+on command at least seems to work) or simply that there are
+some commands in the sequence that are needed to be done
+earlier (eg. could be some DSC init stuff?). If the latter
+then I don't think the current Windows code would work
+either, but maybe this was originally tested with an older
+driver, who knows.
+
+Root causing this fully would likely require a lot of
+experimentation which isn't really feasible without direct
+access to the machine, so let's just accept failure and
+go back to the original sequence.
+
+Cc: stable@vger.kernel.org
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10071
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240116210821.30194-1-ville.syrjala@linux.intel.com
+Acked-by: Jani Nikula <jani.nikula@intel.com>
+(cherry picked from commit dc524d05974f615b145404191fcf91b478950499)
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/icl_dsi.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(stru
+       }
+ 
+       intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
++      intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+ 
+       /* ensure all panel commands dispatched before enabling transcoder */
+       wait_for_cmds_dispatched_to_panel(encoder);
+@@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct inte
+       /* step6d: enable dsi transcoder */
+       gen11_dsi_enable_transcoder(encoder);
+ 
+-      intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+-
+       /* step7: enable backlight */
+       intel_backlight_enable(crtc_state, conn_state);
+       intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch b/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch

new file mode 100644 (file)

index 0000000..09738a2
--- /dev/null
+++ b/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch
@@ -0,0 +1,116 @@
+From 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Sat, 27 Jan 2024 04:04:34 +1000
+Subject: Revert "nouveau: push event block/allowing out of the fence context"
+
+From: Dave Airlie <airlied@redhat.com>
+
+commit 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 upstream.
+
+This reverts commit eacabb5462717a52fccbbbba458365a4f5e61f35.
+
+This commit causes some regressions in desktop usage, this will
+reintroduce the original deadlock in DRI_PRIME situations, I've
+got an idea to fix it by offloading to a workqueue in a different
+spot, however this code has a race condition where we sometimes
+miss interrupts so I'd like to fix that as well.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_fence.c |   28 +++++-----------------------
+ drivers/gpu/drm/nouveau/nouveau_fence.h |    5 +----
+ 2 files changed, 6 insertions(+), 27 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
+@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fenc
+       if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
+               struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+ 
+-              if (atomic_dec_and_test(&fctx->notify_ref))
++              if (!--fctx->notify_ref)
+                       drop = 1;
+       }
+ 
+@@ -103,7 +103,6 @@ nouveau_fence_context_kill(struct nouvea
+ void
+ nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
+ {
+-      cancel_work_sync(&fctx->allow_block_work);
+       nouveau_fence_context_kill(fctx, 0);
+       nvif_event_dtor(&fctx->event);
+       fctx->dead = 1;
+@@ -168,18 +167,6 @@ nouveau_fence_wait_uevent_handler(struct
+       return ret;
+ }
+ 
+-static void
+-nouveau_fence_work_allow_block(struct work_struct *work)
+-{
+-      struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+-                                                     allow_block_work);
+-
+-      if (atomic_read(&fctx->notify_ref) == 0)
+-              nvif_event_block(&fctx->event);
+-      else
+-              nvif_event_allow(&fctx->event);
+-}
+-
+ void
+ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
+ {
+@@ -191,7 +178,6 @@ nouveau_fence_context_new(struct nouveau
+       } args;
+       int ret;
+ 
+-      INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block);
+       INIT_LIST_HEAD(&fctx->flip);
+       INIT_LIST_HEAD(&fctx->pending);
+       spin_lock_init(&fctx->lock);
+@@ -535,19 +521,15 @@ static bool nouveau_fence_enable_signali
+       struct nouveau_fence *fence = from_fence(f);
+       struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+       bool ret;
+-      bool do_work;
+ 
+-      if (atomic_inc_return(&fctx->notify_ref) == 0)
+-              do_work = true;
++      if (!fctx->notify_ref++)
++              nvif_event_allow(&fctx->event);
+ 
+       ret = nouveau_fence_no_signaling(f);
+       if (ret)
+               set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
+-      else if (atomic_dec_and_test(&fctx->notify_ref))
+-              do_work = true;
+-
+-      if (do_work)
+-              schedule_work(&fctx->allow_block_work);
++      else if (!--fctx->notify_ref)
++              nvif_event_block(&fctx->event);
+ 
+       return ret;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
++++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
+@@ -3,7 +3,6 @@
+ #define __NOUVEAU_FENCE_H__
+ 
+ #include <linux/dma-fence.h>
+-#include <linux/workqueue.h>
+ #include <nvif/event.h>
+ 
+ struct nouveau_drm;
+@@ -46,9 +45,7 @@ struct nouveau_fence_chan {
+       char name[32];
+ 
+       struct nvif_event event;
+-      struct work_struct allow_block_work;
+-      atomic_t notify_ref;
+-      int dead, killed;
++      int notify_ref, dead, killed;
+ };
+ 
+ struct nouveau_fence_priv {
diff --git a/queue-6.7/series b/queue-6.7/series

index e3a392fc5e03055a5ef730efd5adcd2d3d3a1228..a4805be1f1b7895de13929c40d7aca66d239dc46 100644 (file)
--- a/queue-6.7/series
+++ b/queue-6.7/series
@@ -219,3 +219,29 @@ selftests-bonding-do-not-test-arp-ns-target-with-mod.patch
  net-fec-fix-the-unhandled-context-fault-from-smmu.patch
  tsnep-remove-fcs-for-xdp-data-path.patch
  tsnep-fix-xdp_ring_need_wakeup-for-empty-fill-ring.patch
+btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch
+btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch
+btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch
+btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch
+btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch
+btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch
+btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch
+rbd-don-t-move-requests-to-the-running-list-on-errors.patch
+exec-fix-error-handling-in-begin_new_exec.patch
+wifi-iwlwifi-fix-a-memory-corruption.patch
+nfsd-fix-release_lockowner.patch
+ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch
+hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch
+netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch
+netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch
+platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch
+platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch
+ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch
+firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch
+revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch
+drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch
+xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch
+gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch
+cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch
+revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch
+revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch
diff --git a/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch b/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch

new file mode 100644 (file)

index 0000000..32bd69a
--- /dev/null
+++ b/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch
@@ -0,0 +1,44 @@
+From cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d Mon Sep 17 00:00:00 2001
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Date: Thu, 11 Jan 2024 15:07:25 +0200
+Subject: wifi: iwlwifi: fix a memory corruption
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+commit cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d upstream.
+
+iwl_fw_ini_trigger_tlv::data is a pointer to a __le32, which means that
+if we copy to iwl_fw_ini_trigger_tlv::data + offset while offset is in
+bytes, we'll write past the buffer.
+
+Cc: stable@vger.kernel.org
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218233
+Fixes: cf29c5b66b9f ("iwlwifi: dbg_ini: implement time point handling")
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240111150610.2d2b8b870194.I14ed76505a5cf87304e0c9cc05cc0ae85ed3bf91@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+ /*
+- * Copyright (C) 2018-2023 Intel Corporation
++ * Copyright (C) 2018-2024 Intel Corporation
+  */
+ #include <linux/firmware.h>
+ #include "iwl-drv.h"
+@@ -1096,7 +1096,7 @@ static int iwl_dbg_tlv_override_trig_nod
+               node_trig = (void *)node_tlv->data;
+       }
+ 
+-      memcpy(node_trig->data + offset, trig->data, trig_data_len);
++      memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len);
+       node_tlv->length = cpu_to_le32(size);
+ 
+       if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) {
diff --git a/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch b/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch

new file mode 100644 (file)

index 0000000..5900e62
--- /dev/null
+++ b/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch
@@ -0,0 +1,127 @@
+From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 16 Jan 2024 15:33:07 +1100
+Subject: xfs: read only mounts with fsopen mount API are busted
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit d8d222e09dab84a17bb65dda4b94d01c565f5327 upstream.
+
+Recently xfs/513 started failing on my test machines testing "-o
+ro,norecovery" mount options. This was being emitted in dmesg:
+
+[ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only.
+
+Turns out, readonly mounts with the fsopen()/fsconfig() mount API
+have been busted since day zero. It's only taken 5 years for debian
+unstable to start using this "new" mount API, and shortly after this
+I noticed xfs/513 had started to fail as per above.
+
+The syscall trace is:
+
+fsopen("xfs", FSOPEN_CLOEXEC)           = 3
+mount_setattr(-1, NULL, 0, NULL, 0)     = -1 EINVAL (Invalid argument)
+.....
+fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0
+fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0
+fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0
+fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument)
+close(3)                                = 0
+
+Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is
+what threw out the error.
+
+During mount instantiation, we call xfs_fs_validate_params() which
+does:
+
+        /* No recovery flag requires a read-only mount */
+        if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+                xfs_warn(mp, "no-recovery mounts must be read-only.");
+                return -EINVAL;
+        }
+
+and xfs_is_readonly() checks internal mount flags for read only
+state. This state is set in xfs_init_fs_context() from the
+context superblock flag state:
+
+        /*
+         * Copy binary VFS mount flags we are interested in.
+         */
+        if (fc->sb_flags & SB_RDONLY)
+                set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+
+With the old mount API, all of the VFS specific superblock flags
+had already been parsed and set before xfs_init_fs_context() is
+called, so this all works fine.
+
+However, in the brave new fsopen/fsconfig world,
+xfs_init_fs_context() is called from fsopen() context, before any
+VFS superblock have been set or parsed. Hence if we use fsopen(),
+the internal XFS readonly state is *never set*. Hence anything that
+depends on xfs_is_readonly() actually returning true for read only
+mounts is broken if fsopen() has been used to mount the filesystem.
+
+Fix this by moving this internal state initialisation to
+xfs_fs_fill_super() before we attempt to validate the parameters
+that have been set prior to the FSCONFIG_CMD_CREATE call being made.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api")
+cc: stable@vger.kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_super.c |   27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -1510,6 +1510,18 @@ xfs_fs_fill_super(
+ 
+       mp->m_super = sb;
+ 
++      /*
++       * Copy VFS mount flags from the context now that all parameter parsing
++       * is guaranteed to have been completed by either the old mount API or
++       * the newer fsopen/fsconfig API.
++       */
++      if (fc->sb_flags & SB_RDONLY)
++              set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
++      if (fc->sb_flags & SB_DIRSYNC)
++              mp->m_features |= XFS_FEAT_DIRSYNC;
++      if (fc->sb_flags & SB_SYNCHRONOUS)
++              mp->m_features |= XFS_FEAT_WSYNC;
++
+       error = xfs_fs_validate_params(mp);
+       if (error)
+               return error;
+@@ -1979,6 +1991,11 @@ static const struct fs_context_operation
+       .free        = xfs_fs_free,
+ };
+ 
++/*
++ * WARNING: do not initialise any parameters in this function that depend on
++ * mount option parsing having already been performed as this can be called from
++ * fsopen() before any parameters have been set.
++ */
+ static int xfs_init_fs_context(
+       struct fs_context       *fc)
+ {
+@@ -2010,16 +2027,6 @@ static int xfs_init_fs_context(
+       mp->m_logbsize = -1;
+       mp->m_allocsize_log = 16; /* 64k */
+ 
+-      /*
+-       * Copy binary VFS mount flags we are interested in.
+-       */
+-      if (fc->sb_flags & SB_RDONLY)
+-              set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+-      if (fc->sb_flags & SB_DIRSYNC)
+-              mp->m_features |= XFS_FEAT_DIRSYNC;
+-      if (fc->sb_flags & SB_SYNCHRONOUS)
+-              mp->m_features |= XFS_FEAT_WSYNC;
+-
+       fc->s_fs_info = mp;
+       fc->ops = &xfs_context_ops;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 27 Jan 2024 22:16:48 +0000 (14:16 -0800)
queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/nfsd-fix-release_lockowner.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/series		patch \| blob \| blame \| history
queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch	[new file with mode: 0644]	patch \| blob