From: Greg Kroah-Hartman Date: Sat, 27 Jan 2024 22:16:48 +0000 (-0800) Subject: 6.7-stable patches X-Git-Tag: v6.1.76~55 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f38fea462273ab2b7a0c269a30b2d55555820917;p=thirdparty%2Fkernel%2Fstable-queue.git 6.7-stable patches added patches: btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch exec-fix-error-handling-in-begin_new_exec.patch firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch nfsd-fix-release_lockowner.patch ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch rbd-don-t-move-requests-to-the-running-list-on-errors.patch revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch wifi-iwlwifi-fix-a-memory-corruption.patch xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch --- diff --git a/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch b/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch new file mode 100644 index 00000000000..99e3e2bfc4e --- /dev/null +++ b/queue-6.7/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch @@ -0,0 +1,116 @@ +From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001 +From: Omar Sandoval +Date: Thu, 4 Jan 2024 11:48:47 -0800 +Subject: btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted + +From: Omar Sandoval + +commit 3324d0547861b16cf436d54abba7052e0c8aa9de upstream. + +Sweet Tea spotted a race between subvolume deletion and snapshotting +that can result in the root item for the snapshot having the +BTRFS_ROOT_SUBVOL_DEAD flag set. The race is: + +Thread 1 | Thread 2 +----------------------------------------------|---------- +btrfs_delete_subvolume | + btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)| + |btrfs_mksubvol + | down_read(subvol_sem) + | create_snapshot + | ... + | create_pending_snapshot + | copy root item from source + down_write(subvol_sem) | + +This flag is only checked in send and swap activate, which this would +cause to fail mysteriously. + +create_snapshot() now checks the root refs to reject a deleted +subvolume, so we can fix this by locking subvol_sem earlier so that the +BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically. + +CC: stable@vger.kernel.org # 4.14+ +Reported-by: Sweet Tea Dorminy +Reviewed-by: Sweet Tea Dorminy +Reviewed-by: Anand Jain +Signed-off-by: Omar Sandoval +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4449,6 +4449,8 @@ int btrfs_delete_subvolume(struct btrfs_ + u64 root_flags; + int ret; + ++ down_write(&fs_info->subvol_sem); ++ + /* + * Don't allow to delete a subvolume with send in progress. This is + * inside the inode lock so the error handling that has to drop the bit +@@ -4460,25 +4462,25 @@ int btrfs_delete_subvolume(struct btrfs_ + btrfs_warn(fs_info, + "attempt to delete subvolume %llu during send", + dest->root_key.objectid); +- return -EPERM; ++ ret = -EPERM; ++ goto out_up_write; + } + if (atomic_read(&dest->nr_swapfiles)) { + spin_unlock(&dest->root_item_lock); + btrfs_warn(fs_info, + "attempt to delete subvolume %llu with active swapfile", + root->root_key.objectid); +- return -EPERM; ++ ret = -EPERM; ++ goto out_up_write; + } + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, + root_flags | BTRFS_ROOT_SUBVOL_DEAD); + spin_unlock(&dest->root_item_lock); + +- down_write(&fs_info->subvol_sem); +- + ret = may_destroy_subvol(dest); + if (ret) +- goto out_up_write; ++ goto out_undead; + + btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); + /* +@@ -4488,7 +4490,7 @@ int btrfs_delete_subvolume(struct btrfs_ + */ + ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); + if (ret) +- goto out_up_write; ++ goto out_undead; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { +@@ -4554,15 +4556,17 @@ out_end_trans: + inode->i_flags |= S_DEAD; + out_release: + btrfs_subvolume_release_metadata(root, &block_rsv); +-out_up_write: +- up_write(&fs_info->subvol_sem); ++out_undead: + if (ret) { + spin_lock(&dest->root_item_lock); + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, + root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); + spin_unlock(&dest->root_item_lock); +- } else { ++ } ++out_up_write: ++ up_write(&fs_info->subvol_sem); ++ if (!ret) { + d_invalidate(dentry); + btrfs_prune_dentries(dest); + ASSERT(dest->send_in_progress == 0); diff --git a/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch b/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch new file mode 100644 index 00000000000..bd9c0e4c745 --- /dev/null +++ b/queue-6.7/btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch @@ -0,0 +1,54 @@ +From 173431b274a9a54fc10b273b46e67f46bcf62d2e Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 10 Jan 2024 08:58:26 +1030 +Subject: btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args + +From: Qu Wenruo + +commit 173431b274a9a54fc10b273b46e67f46bcf62d2e upstream. + +Add extra sanity check for btrfs_ioctl_defrag_range_args::flags. + +This is not really to enhance fuzzing tests, but as a preparation for +future expansion on btrfs_ioctl_defrag_range_args. + +In the future we're going to add new members, allowing more fine tuning +for btrfs defrag. Without the -ENONOTSUPP error, there would be no way +to detect if the kernel supports those new defrag features. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 4 ++++ + include/uapi/linux/btrfs.h | 3 +++ + 2 files changed, 7 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2608,6 +2608,10 @@ static int btrfs_ioctl_defrag(struct fil + ret = -EFAULT; + goto out; + } ++ if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } + /* compression requires us to start the IO */ + if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + range.flags |= BTRFS_DEFRAG_RANGE_START_IO; +--- a/include/uapi/linux/btrfs.h ++++ b/include/uapi/linux/btrfs.h +@@ -614,6 +614,9 @@ struct btrfs_ioctl_clone_range_args { + */ + #define BTRFS_DEFRAG_RANGE_COMPRESS 1 + #define BTRFS_DEFRAG_RANGE_START_IO 2 ++#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ ++ BTRFS_DEFRAG_RANGE_START_IO) ++ + struct btrfs_ioctl_defrag_range_args { + /* start of the defrag operation */ + __u64 start; diff --git a/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch b/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch new file mode 100644 index 00000000000..b3b2731114e --- /dev/null +++ b/queue-6.7/btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch @@ -0,0 +1,97 @@ +From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001 +From: Omar Sandoval +Date: Thu, 4 Jan 2024 11:48:46 -0800 +Subject: btrfs: don't abort filesystem when attempting to snapshot deleted subvolume + +From: Omar Sandoval + +commit 7081929ab2572920e94d70be3d332e5c9f97095a upstream. + +If the source file descriptor to the snapshot ioctl refers to a deleted +subvolume, we get the following abort: + + BTRFS: Transaction aborted (error -2) + WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs] + Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c + CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 + RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs] + RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282 + RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027 + RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840 + RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998 + R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe + R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80 + FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0 + Call Trace: + + ? create_pending_snapshot+0x1040/0x1190 [btrfs] + ? __warn+0x81/0x130 + ? create_pending_snapshot+0x1040/0x1190 [btrfs] + ? report_bug+0x171/0x1a0 + ? handle_bug+0x3a/0x70 + ? exc_invalid_op+0x17/0x70 + ? asm_exc_invalid_op+0x1a/0x20 + ? create_pending_snapshot+0x1040/0x1190 [btrfs] + ? create_pending_snapshot+0x1040/0x1190 [btrfs] + create_pending_snapshots+0x92/0xc0 [btrfs] + btrfs_commit_transaction+0x66b/0xf40 [btrfs] + btrfs_mksubvol+0x301/0x4d0 [btrfs] + btrfs_mksnapshot+0x80/0xb0 [btrfs] + __btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs] + btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs] + btrfs_ioctl+0x8a6/0x2650 [btrfs] + ? kmem_cache_free+0x22/0x340 + ? do_sys_openat2+0x97/0xe0 + __x64_sys_ioctl+0x97/0xd0 + do_syscall_64+0x46/0xf0 + entry_SYSCALL_64_after_hwframe+0x6e/0x76 + RIP: 0033:0x7fe20abe83af + RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af + RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003 + RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0 + R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58 + + ---[ end trace 0000000000000000 ]--- + BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry + BTRFS info (device vdc: state EA): forced readonly + BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction. + BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry + +This happens because create_pending_snapshot() initializes the new root +item as a copy of the source root item. This includes the refs field, +which is 0 for a deleted subvolume. The call to btrfs_insert_root() +therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then +finds the root and returns -ENOENT if refs == 0, which causes +create_pending_snapshot() to abort. + +Fix it by checking the source root's refs before attempting the +snapshot, but after locking subvol_sem to avoid racing with deletion. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Sweet Tea Dorminy +Reviewed-by: Anand Jain +Signed-off-by: Omar Sandoval +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_ + return -EOPNOTSUPP; + } + ++ if (btrfs_root_refs(&root->root_item) == 0) ++ return -ENOENT; ++ + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) + return -EINVAL; + diff --git a/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch b/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch new file mode 100644 index 00000000000..2c1ee9868d4 --- /dev/null +++ b/queue-6.7/btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch @@ -0,0 +1,37 @@ +From a208b3f132b48e1f94f620024e66fea635925877 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Mon, 15 Jan 2024 20:30:26 +0100 +Subject: btrfs: don't warn if discard range is not aligned to sector + +From: David Sterba + +commit a208b3f132b48e1f94f620024e66fea635925877 upstream. + +There's a warning in btrfs_issue_discard() when the range is not aligned +to 512 bytes, originally added in 4d89d377bbb0 ("btrfs: +btrfs_issue_discard ensure offset/length are aligned to sector +boundaries"). We can't do sub-sector writes anyway so the adjustment is +the only thing that we can do and the warning is unnecessary. + +CC: stable@vger.kernel.org # 4.19+ +Reported-by: syzbot+4a4f1eba14eb5c3417d1@syzkaller.appspotmail.com +Reviewed-by: Johannes Thumshirn +Reviewed-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct bl + u64 bytes_left, end; + u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT); + +- if (WARN_ON(start != aligned_start)) { ++ /* Adjust the range to be aligned to 512B sectors if necessary. */ ++ if (start != aligned_start) { + len -= aligned_start - start; + len = round_down(len, 1 << SECTOR_SHIFT); + start = aligned_start; diff --git a/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch b/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch new file mode 100644 index 00000000000..c42b72d76c1 --- /dev/null +++ b/queue-6.7/btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch @@ -0,0 +1,57 @@ +From f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 Mon Sep 17 00:00:00 2001 +From: Fedor Pchelkin +Date: Wed, 3 Jan 2024 13:31:27 +0300 +Subject: btrfs: ref-verify: free ref cache before clearing mount opt + +From: Fedor Pchelkin + +commit f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 upstream. + +As clearing REF_VERIFY mount option indicates there were some errors in a +ref-verify process, a ref cache is not relevant anymore and should be +freed. + +btrfs_free_ref_cache() requires REF_VERIFY option being set so call +it just before clearing the mount option. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Reported-by: syzbot+be14ed7728594dc8bd42@syzkaller.appspotmail.com +Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool") +CC: stable@vger.kernel.org # 5.4+ +Closes: https://lore.kernel.org/lkml/000000000000e5a65c05ee832054@google.com/ +Reported-by: syzbot+c563a3c79927971f950f@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/lkml/0000000000007fe09705fdc6086c@google.com/ +Reviewed-by: Anand Jain +Signed-off-by: Fedor Pchelkin +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ref-verify.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/ref-verify.c ++++ b/fs/btrfs/ref-verify.c +@@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_i + out_unlock: + spin_unlock(&fs_info->ref_verify_lock); + out: +- if (ret) ++ if (ret) { ++ btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); ++ } + return ret; + } + +@@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs + } + } + if (ret) { +- btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + btrfs_free_ref_cache(fs_info); ++ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } + btrfs_free_path(path); + return ret; diff --git a/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch b/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch new file mode 100644 index 00000000000..1af82a6bb0a --- /dev/null +++ b/queue-6.7/btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch @@ -0,0 +1,35 @@ +From f398e70dd69e6ceea71463a5380e6118f219197e Mon Sep 17 00:00:00 2001 +From: Chung-Chiang Cheng +Date: Fri, 12 Jan 2024 15:41:05 +0800 +Subject: btrfs: tree-checker: fix inline ref size in error messages + +From: Chung-Chiang Cheng + +commit f398e70dd69e6ceea71463a5380e6118f219197e upstream. + +The error message should accurately reflect the size rather than the +type. + +Fixes: f82d1c7ca8ae ("btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Qu Wenruo +Signed-off-by: Chung-Chiang Cheng +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-checker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1436,7 +1436,7 @@ static int check_extent_item(struct exte + if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { + extent_err(leaf, slot, + "inline ref item overflows extent item, ptr %lu iref size %u end %lu", +- ptr, inline_type, end); ++ ptr, btrfs_extent_inline_ref_size(inline_type), end); + return -EUCLEAN; + } + diff --git a/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch b/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch new file mode 100644 index 00000000000..ded22c82cb9 --- /dev/null +++ b/queue-6.7/btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch @@ -0,0 +1,99 @@ +From b18f3b60b35a8c01c9a2a0f0d6424c6d73971dc3 Mon Sep 17 00:00:00 2001 +From: Naohiro Aota +Date: Fri, 22 Dec 2023 13:56:34 +0900 +Subject: btrfs: zoned: fix lock ordering in btrfs_zone_activate() + +From: Naohiro Aota + +commit b18f3b60b35a8c01c9a2a0f0d6424c6d73971dc3 upstream. + +The btrfs CI reported a lockdep warning as follows by running generic +generic/129. + + WARNING: possible circular locking dependency detected + 6.7.0-rc5+ #1 Not tainted + ------------------------------------------------------ + kworker/u5:5/793427 is trying to acquire lock: + ffff88813256d028 (&cache->lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x5e/0x130 + but task is already holding lock: + ffff88810a23a318 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x34/0x130 + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + -> #1 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}: + ... + -> #0 (&cache->lock){+.+.}-{2:2}: + ... + +This is because we take fs_info->zone_active_bgs_lock after a block_group's +lock in btrfs_zone_activate() while doing the opposite in other places. + +Fix the issue by expanding the fs_info->zone_active_bgs_lock's critical +section and taking it before a block_group's lock. + +Fixes: a7e1ac7bdc5a ("btrfs: zoned: reserve zones for an active metadata/system block group") +CC: stable@vger.kernel.org # 6.6 +Signed-off-by: Naohiro Aota +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -2094,6 +2094,7 @@ bool btrfs_zone_activate(struct btrfs_bl + + map = block_group->physical_map; + ++ spin_lock(&fs_info->zone_active_bgs_lock); + spin_lock(&block_group->lock); + if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { + ret = true; +@@ -2106,7 +2107,6 @@ bool btrfs_zone_activate(struct btrfs_bl + goto out_unlock; + } + +- spin_lock(&fs_info->zone_active_bgs_lock); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_zoned_device_info *zinfo; + int reserved = 0; +@@ -2126,20 +2126,17 @@ bool btrfs_zone_activate(struct btrfs_bl + */ + if (atomic_read(&zinfo->active_zones_left) <= reserved) { + ret = false; +- spin_unlock(&fs_info->zone_active_bgs_lock); + goto out_unlock; + } + + if (!btrfs_dev_set_active_zone(device, physical)) { + /* Cannot activate the zone */ + ret = false; +- spin_unlock(&fs_info->zone_active_bgs_lock); + goto out_unlock; + } + if (!is_data) + zinfo->reserved_active_zones--; + } +- spin_unlock(&fs_info->zone_active_bgs_lock); + + /* Successfully activated all the zones */ + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); +@@ -2147,8 +2144,6 @@ bool btrfs_zone_activate(struct btrfs_bl + + /* For the active block group list */ + btrfs_get_block_group(block_group); +- +- spin_lock(&fs_info->zone_active_bgs_lock); + list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); + +@@ -2156,6 +2151,7 @@ bool btrfs_zone_activate(struct btrfs_bl + + out_unlock: + spin_unlock(&block_group->lock); ++ spin_unlock(&fs_info->zone_active_bgs_lock); + return ret; + } + diff --git a/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch b/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch new file mode 100644 index 00000000000..b1727282b01 --- /dev/null +++ b/queue-6.7/cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch @@ -0,0 +1,138 @@ +From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Mon, 22 Jan 2024 15:18:11 +0100 +Subject: cpufreq: intel_pstate: Refine computation of P-state for given frequency + +From: Rafael J. Wysocki + +commit 192cdb1c907fd8df2d764c5bb17496e415e59391 upstream. + +On systems using HWP, if a given frequency is equal to the maximum turbo +frequency or the maximum non-turbo frequency, the HWP performance level +corresponding to it is already known and can be used directly without +any computation. + +Accordingly, adjust the code to use the known HWP performance levels in +the cases mentioned above. + +This also helps to avoid limiting CPU capacity artificially in some +cases when the BIOS produces the HWP_CAP numbers using a different +E-core-to-P-core performance scaling factor than expected by the kernel. + +Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores") +Cc: 6.1+ # 6.1+ +Tested-by: Srinivas Pandruvada +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/intel_pstate.c | 55 +++++++++++++++++++++++++---------------- + 1 file changed, 34 insertions(+), 21 deletions(-) + +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -526,6 +526,30 @@ static int intel_pstate_cppc_get_scaling + } + #endif /* CONFIG_ACPI_CPPC_LIB */ + ++static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, ++ unsigned int relation) ++{ ++ if (freq == cpu->pstate.turbo_freq) ++ return cpu->pstate.turbo_pstate; ++ ++ if (freq == cpu->pstate.max_freq) ++ return cpu->pstate.max_pstate; ++ ++ switch (relation) { ++ case CPUFREQ_RELATION_H: ++ return freq / cpu->pstate.scaling; ++ case CPUFREQ_RELATION_C: ++ return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); ++ } ++ ++ return DIV_ROUND_UP(freq, cpu->pstate.scaling); ++} ++ ++static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) ++{ ++ return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); ++} ++ + /** + * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. + * @cpu: Target CPU. +@@ -543,6 +567,7 @@ static void intel_pstate_hybrid_hwp_adju + int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; + int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); + int scaling = cpu->pstate.scaling; ++ int freq; + + pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); + pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); +@@ -556,16 +581,16 @@ static void intel_pstate_hybrid_hwp_adju + cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, + perf_ctl_scaling); + +- cpu->pstate.max_pstate_physical = +- DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, +- scaling); ++ freq = perf_ctl_max_phys * perf_ctl_scaling; ++ cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); + +- cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; ++ freq = cpu->pstate.min_pstate * perf_ctl_scaling; ++ cpu->pstate.min_freq = freq; + /* + * Cast the min P-state value retrieved via pstate_funcs.get_min() to + * the effective range of HWP performance levels. + */ +- cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); ++ cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); + } + + static inline void update_turbo_state(void) +@@ -2524,13 +2549,12 @@ static void intel_pstate_update_perf_lim + * abstract values to represent performance rather than pure ratios. + */ + if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { +- int scaling = cpu->pstate.scaling; + int freq; + + freq = max_policy_perf * perf_ctl_scaling; +- max_policy_perf = DIV_ROUND_UP(freq, scaling); ++ max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); + freq = min_policy_perf * perf_ctl_scaling; +- min_policy_perf = DIV_ROUND_UP(freq, scaling); ++ min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); + } + + pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", +@@ -2904,18 +2928,7 @@ static int intel_cpufreq_target(struct c + + cpufreq_freq_transition_begin(policy, &freqs); + +- switch (relation) { +- case CPUFREQ_RELATION_L: +- target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); +- break; +- case CPUFREQ_RELATION_H: +- target_pstate = freqs.new / cpu->pstate.scaling; +- break; +- default: +- target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); +- break; +- } +- ++ target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); + target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); + + freqs.new = target_pstate * cpu->pstate.scaling; +@@ -2933,7 +2946,7 @@ static unsigned int intel_cpufreq_fast_s + + update_turbo_state(); + +- target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); ++ target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); + + target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); + diff --git a/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch b/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch new file mode 100644 index 00000000000..a39368dc69a --- /dev/null +++ b/queue-6.7/drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch @@ -0,0 +1,49 @@ +From bc03c02cc1991a066b23e69bbcc0f66e8f1f7453 Mon Sep 17 00:00:00 2001 +From: Ma Jun +Date: Fri, 12 Jan 2024 13:33:24 +0800 +Subject: drm/amdgpu: Fix the null pointer when load rlc firmware + +From: Ma Jun + +commit bc03c02cc1991a066b23e69bbcc0f66e8f1f7453 upstream. + +If the RLC firmware is invalid because of wrong header size, +the pointer to the rlc firmware is released in function +amdgpu_ucode_request. There will be a null pointer error +in subsequent use. So skip validation to fix it. + +Fixes: 3da9b71563cb ("drm/amd: Use `amdgpu_ucode_*` helpers for GFX10") +Signed-off-by: Ma Jun +Acked-by: Alex Deucher +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(stru + + if (!amdgpu_sriov_vf(adev)) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); +- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); +- /* don't check this. There are apparently firmwares in the wild with +- * incorrect size in the header +- */ +- if (err == -ENODEV) +- goto out; ++ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) +- dev_dbg(adev->dev, +- "gfx10: amdgpu_ucode_request() failed \"%s\"\n", +- fw_name); ++ goto out; ++ ++ /* don't validate this firmware. There are apparently firmwares ++ * in the wild with incorrect size in the header ++ */ + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); diff --git a/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch b/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch new file mode 100644 index 00000000000..bde8ac97011 --- /dev/null +++ b/queue-6.7/exec-fix-error-handling-in-begin_new_exec.patch @@ -0,0 +1,37 @@ +From 84c39ec57d409e803a9bb6e4e85daf1243e0e80b Mon Sep 17 00:00:00 2001 +From: Bernd Edlinger +Date: Mon, 22 Jan 2024 19:34:21 +0100 +Subject: exec: Fix error handling in begin_new_exec() + +From: Bernd Edlinger + +commit 84c39ec57d409e803a9bb6e4e85daf1243e0e80b upstream. + +If get_unused_fd_flags() fails, the error handling is incomplete because +bprm->cred is already set to NULL, and therefore free_bprm will not +unlock the cred_guard_mutex. Note there are two error conditions which +end up here, one before and one after bprm->cred is cleared. + +Fixes: b8a61c9e7b4a ("exec: Generic execfd support") +Signed-off-by: Bernd Edlinger +Acked-by: Eric W. Biederman +Link: https://lore.kernel.org/r/AS8P193MB128517ADB5EFF29E04389EDAE4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM +Cc: stable@vger.kernel.org +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman +--- + fs/exec.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1408,6 +1408,9 @@ int begin_new_exec(struct linux_binprm * + + out_unlock: + up_write(&me->signal->exec_update_lock); ++ if (!bprm->cred) ++ mutex_unlock(&me->signal->cred_guard_mutex); ++ + out: + return retval; + } diff --git a/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch b/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch new file mode 100644 index 00000000000..3e22944b14a --- /dev/null +++ b/queue-6.7/firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch @@ -0,0 +1,96 @@ +From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001 +From: Cristian Marussi +Date: Wed, 20 Dec 2023 17:21:12 +0000 +Subject: firmware: arm_scmi: Check mailbox/SMT channel for consistency + +From: Cristian Marussi + +commit 437a310b22244d4e0b78665c3042e5d1c0f45306 upstream. + +On reception of a completion interrupt the shared memory area is accessed +to retrieve the message header at first and then, if the message sequence +number identifies a transaction which is still pending, the related +payload is fetched too. + +When an SCMI command times out the channel ownership remains with the +platform until eventually a late reply is received and, as a consequence, +any further transmission attempt remains pending, waiting for the channel +to be relinquished by the platform. + +Once that late reply is received the channel ownership is given back +to the agent and any pending request is then allowed to proceed and +overwrite the SMT area of the just delivered late reply; then the wait +for the reply to the new request starts. + +It has been observed that the spurious IRQ related to the late reply can +be wrongly associated with the freshly enqueued request: when that happens +the SCMI stack in-flight lookup procedure is fooled by the fact that the +message header now present in the SMT area is related to the new pending +transaction, even though the real reply has still to arrive. + +This race-condition on the A2P channel can be detected by looking at the +channel status bits: a genuine reply from the platform will have set the +channel free bit before triggering the completion IRQ. + +Add a consistency check to validate such condition in the A2P ISR. + +Reported-by: Xinglong Yang +Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/ +Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") +Cc: stable@vger.kernel.org # 5.15+ +Signed-off-by: Cristian Marussi +Tested-by: Xinglong Yang +Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com +Signed-off-by: Sudeep Holla +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/arm_scmi/common.h | 1 + + drivers/firmware/arm_scmi/mailbox.c | 14 ++++++++++++++ + drivers/firmware/arm_scmi/shmem.c | 6 ++++++ + 3 files changed, 21 insertions(+) + +--- a/drivers/firmware/arm_scmi/common.h ++++ b/drivers/firmware/arm_scmi/common.h +@@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scm + void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); + bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, + struct scmi_xfer *xfer); ++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); + + /* declarations for message passing transports */ + struct scmi_msg_payld; +--- a/drivers/firmware/arm_scmi/mailbox.c ++++ b/drivers/firmware/arm_scmi/mailbox.c +@@ -45,6 +45,20 @@ static void rx_callback(struct mbox_clie + { + struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + ++ /* ++ * An A2P IRQ is NOT valid when received while the platform still has ++ * the ownership of the channel, because the platform at first releases ++ * the SMT channel and then sends the completion interrupt. ++ * ++ * This addresses a possible race condition in which a spurious IRQ from ++ * a previous timed-out reply which arrived late could be wrongly ++ * associated with the next pending transaction. ++ */ ++ if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { ++ dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); ++ return; ++ } ++ + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); + } + +--- a/drivers/firmware/arm_scmi/shmem.c ++++ b/drivers/firmware/arm_scmi/shmem.c +@@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_ + (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); + } ++ ++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) ++{ ++ return (ioread32(&shmem->channel_status) & ++ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); ++} diff --git a/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch b/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch new file mode 100644 index 00000000000..d7a0e717ed2 --- /dev/null +++ b/queue-6.7/gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch @@ -0,0 +1,46 @@ +From 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Wed, 17 Jan 2024 08:29:42 -0600 +Subject: gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04 + +From: Mario Limonciello + +commit 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 upstream. + +Spurious wakeups are reported on the GPD G1619-04 which +can be absolved by programming the GPIO to ignore wakeups. + +Cc: stable@vger.kernel.org +Reported-and-tested-by: George Melikov +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073 +Signed-off-by: Mario Limonciello +Reviewed-by: Andy Shevchenko +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/drivers/gpio/gpiolib-acpi.c ++++ b/drivers/gpio/gpiolib-acpi.c +@@ -1651,6 +1651,20 @@ static const struct dmi_system_id gpioli + .ignore_interrupt = "INT33FC:00@3", + }, + }, ++ { ++ /* ++ * Spurious wakeups from TP_ATTN# pin ++ * Found in BIOS 0.35 ++ * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 ++ */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "GPD"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), ++ }, ++ .driver_data = &(struct acpi_gpiolib_dmi_quirk) { ++ .ignore_wake = "PNP0C50:00@8", ++ }, ++ }, + {} /* Terminating entry */ + }; + diff --git a/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch b/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch new file mode 100644 index 00000000000..b7a876cf5c9 --- /dev/null +++ b/queue-6.7/hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch @@ -0,0 +1,56 @@ +From 6941f67ad37d5465b75b9ffc498fcf6897a3c00e Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Mon, 22 Jan 2024 08:20:28 -0800 +Subject: hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes + +From: Michael Kelley + +commit 6941f67ad37d5465b75b9ffc498fcf6897a3c00e upstream. + +Current code in netvsc_drv_init() incorrectly assumes that PAGE_SIZE +is 4 Kbytes, which is wrong on ARM64 with 16K or 64K page size. As a +result, the default VMBus ring buffer size on ARM64 with 64K page size +is 8 Mbytes instead of the expected 512 Kbytes. While this doesn't break +anything, a typical VM with 8 vCPUs and 8 netvsc channels wastes 120 +Mbytes (8 channels * 2 ring buffers/channel * 7.5 Mbytes/ring buffer). + +Unfortunately, the module parameter specifying the ring buffer size +is in units of 4 Kbyte pages. Ideally, it should be in units that +are independent of PAGE_SIZE, but backwards compatibility prevents +changing that now. + +Fix this by having netvsc_drv_init() hardcode 4096 instead of using +PAGE_SIZE when calculating the ring buffer size in bytes. Also +use the VMBUS_RING_SIZE macro to ensure proper alignment when running +with page size larger than 4K. + +Cc: # 5.15.x +Fixes: 7aff79e297ee ("Drivers: hv: Enable Hyper-V code to be built on ARM64") +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/20240122162028.348885-1-mhklinux@outlook.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -44,7 +44,7 @@ + + static unsigned int ring_size __ro_after_init = 128; + module_param(ring_size, uint, 0444); +-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); ++MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); + unsigned int netvsc_ring_bytes __ro_after_init; + + static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | +@@ -2805,7 +2805,7 @@ static int __init netvsc_drv_init(void) + pr_info("Increased ring_size to %u (min allowed)\n", + ring_size); + } +- netvsc_ring_bytes = ring_size * PAGE_SIZE; ++ netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); + + register_netdevice_notifier(&netvsc_netdev_notifier); + diff --git a/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch b/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch new file mode 100644 index 00000000000..c67b18bb56e --- /dev/null +++ b/queue-6.7/ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch @@ -0,0 +1,126 @@ +From ebeae8adf89d9a82359f6659b1663d09beec2faa Mon Sep 17 00:00:00 2001 +From: Lin Ma +Date: Sun, 21 Jan 2024 15:35:06 +0800 +Subject: ksmbd: fix global oob in ksmbd_nl_policy + +From: Lin Ma + +commit ebeae8adf89d9a82359f6659b1663d09beec2faa upstream. + +Similar to a reported issue (check the commit b33fb5b801c6 ("net: +qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds +another global out-of-bounds read for policy ksmbd_nl_policy. See bug +trace below: + +================================================================== +BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline] +BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 +Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810 + +CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G N 6.1.0 #3 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106 + print_address_description mm/kasan/report.c:284 [inline] + print_report+0x172/0x475 mm/kasan/report.c:395 + kasan_report+0xbb/0x1c0 mm/kasan/report.c:495 + validate_nla lib/nlattr.c:386 [inline] + __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 + __nla_parse+0x3e/0x50 lib/nlattr.c:697 + __nlmsg_parse include/net/netlink.h:748 [inline] + genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565 + genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734 + genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] + genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850 + netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540 + genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 + netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345 + netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921 + sock_sendmsg_nosec net/socket.c:714 [inline] + sock_sendmsg+0x154/0x190 net/socket.c:734 + ____sys_sendmsg+0x6df/0x840 net/socket.c:2482 + ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 + __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7fdd66a8f359 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359 +RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003 +RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000 + + +The buggy address belongs to the variable: + ksmbd_nl_policy+0x100/0xa80 + +The buggy address belongs to the physical page: +page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b +flags: 0x200000000001000(reserved|node=0|zone=2) +raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000 +raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +>ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9 + ^ + ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05 + ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9 +================================================================== + +To fix it, add a placeholder named __KSMBD_EVENT_MAX and let +KSMBD_EVENT_MAX to be its original value - 1 according to what other +netlink families do. Also change two sites that refer the +KSMBD_EVENT_MAX to correct value. + +Cc: stable@vger.kernel.org +Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") +Signed-off-by: Lin Ma +Acked-by: Namjae Jeon +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/ksmbd_netlink.h | 3 ++- + fs/smb/server/transport_ipc.c | 4 ++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/smb/server/ksmbd_netlink.h ++++ b/fs/smb/server/ksmbd_netlink.h +@@ -304,7 +304,8 @@ enum ksmbd_event { + KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, + KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, + +- KSMBD_EVENT_MAX ++ __KSMBD_EVENT_MAX, ++ KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 + }; + + /* +--- a/fs/smb/server/transport_ipc.c ++++ b/fs/smb/server/transport_ipc.c +@@ -74,7 +74,7 @@ static int handle_unsupported_event(stru + static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); + static int ksmbd_ipc_heartbeat_request(void); + +-static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { ++static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { + [KSMBD_EVENT_UNSPEC] = { + .len = 0, + }, +@@ -403,7 +403,7 @@ static int handle_generic_event(struct s + return -EPERM; + #endif + +- if (type >= KSMBD_EVENT_MAX) { ++ if (type > KSMBD_EVENT_MAX) { + WARN_ON(1); + return -EINVAL; + } diff --git a/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch b/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch new file mode 100644 index 00000000000..100551132af --- /dev/null +++ b/queue-6.7/netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch @@ -0,0 +1,69 @@ +From f342de4e2f33e0e39165d8639387aa6c19dff660 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Sat, 20 Jan 2024 22:50:04 +0100 +Subject: netfilter: nf_tables: reject QUEUE/DROP verdict parameters + +From: Florian Westphal + +commit f342de4e2f33e0e39165d8639387aa6c19dff660 upstream. + +This reverts commit e0abdadcc6e1. + +core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP +verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar, +or 0. + +Due to the reverted commit, its possible to provide a positive +value, e.g. NF_ACCEPT (1), which results in use-after-free. + +Its not clear to me why this commit was made. + +NF_QUEUE is not used by nftables; "queue" rules in nftables +will result in use of "nft_queue" expression. + +If we later need to allow specifiying errno values from userspace +(do not know why), this has to call NF_DROP_GETERR and check that +"err <= 0" holds true. + +Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters") +Cc: stable@vger.kernel.org +Reported-by: Notselwyn +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -10909,16 +10909,10 @@ static int nft_verdict_init(const struct + data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict.code) { +- default: +- switch (data->verdict.code & NF_VERDICT_MASK) { +- case NF_ACCEPT: +- case NF_DROP: +- case NF_QUEUE: +- break; +- default: +- return -EINVAL; +- } +- fallthrough; ++ case NF_ACCEPT: ++ case NF_DROP: ++ case NF_QUEUE: ++ break; + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: +@@ -10953,6 +10947,8 @@ static int nft_verdict_init(const struct + + data->verdict.chain = chain; + break; ++ default: ++ return -EINVAL; + } + + desc->len = sizeof(data->verdict); diff --git a/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch b/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch new file mode 100644 index 00000000000..ce0b4687f08 --- /dev/null +++ b/queue-6.7/netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch @@ -0,0 +1,57 @@ +From 01acb2e8666a6529697141a6017edbf206921913 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Thu, 18 Jan 2024 10:56:26 +0100 +Subject: netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain + +From: Pablo Neira Ayuso + +commit 01acb2e8666a6529697141a6017edbf206921913 upstream. + +Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER +event is reported, otherwise a stale reference to netdevice remains in +the hook list. + +Fixes: 60a3815da702 ("netfilter: add inet ingress support") +Cc: stable@vger.kernel.org +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_chain_filter.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/net/netfilter/nft_chain_filter.c ++++ b/net/netfilter/nft_chain_filter.c +@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct + unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct nft_base_chain *basechain; + struct nftables_pernet *nft_net; +- struct nft_table *table; + struct nft_chain *chain, *nr; ++ struct nft_table *table; + struct nft_ctx ctx = { + .net = dev_net(dev), + }; +@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct + nft_net = nft_pernet(ctx.net); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { +- if (table->family != NFPROTO_NETDEV) ++ if (table->family != NFPROTO_NETDEV && ++ table->family != NFPROTO_INET) + continue; + + ctx.family = table->family; +@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct + if (!nft_is_base_chain(chain)) + continue; + ++ basechain = nft_base_chain(chain); ++ if (table->family == NFPROTO_INET && ++ basechain->ops.hooknum != NF_INET_INGRESS) ++ continue; ++ + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); + } diff --git a/queue-6.7/nfsd-fix-release_lockowner.patch b/queue-6.7/nfsd-fix-release_lockowner.patch new file mode 100644 index 00000000000..2d6202af6b7 --- /dev/null +++ b/queue-6.7/nfsd-fix-release_lockowner.patch @@ -0,0 +1,144 @@ +From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 22 Jan 2024 14:58:16 +1100 +Subject: nfsd: fix RELEASE_LOCKOWNER + +From: NeilBrown + +commit edcf9725150e42beeca42d085149f4c88fa97afd upstream. + +The test on so_count in nfsd4_release_lockowner() is nonsense and +harmful. Revert to using check_for_locks(), changing that to not sleep. + +First: harmful. +As is documented in the kdoc comment for nfsd4_release_lockowner(), the +test on so_count can transiently return a false positive resulting in a +return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is +clearly a protocol violation and with the Linux NFS client it can cause +incorrect behaviour. + +If RELEASE_LOCKOWNER is sent while some other thread is still +processing a LOCK request which failed because, at the time that request +was received, the given owner held a conflicting lock, then the nfsd +thread processing that LOCK request can hold a reference (conflock) to +the lock owner that causes nfsd4_release_lockowner() to return an +incorrect error. + +The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it +never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so +it knows that the error is impossible. It assumes the lock owner was in +fact released so it feels free to use the same lock owner identifier in +some later locking request. + +When it does reuse a lock owner identifier for which a previous RELEASE +failed, it will naturally use a lock_seqid of zero. However the server, +which didn't release the lock owner, will expect a larger lock_seqid and +so will respond with NFS4ERR_BAD_SEQID. + +So clearly it is harmful to allow a false positive, which testing +so_count allows. + +The test is nonsense because ... well... it doesn't mean anything. + +so_count is the sum of three different counts. +1/ the set of states listed on so_stateids +2/ the set of active vfs locks owned by any of those states +3/ various transient counts such as for conflicting locks. + +When it is tested against '2' it is clear that one of these is the +transient reference obtained by find_lockowner_str_locked(). It is not +clear what the other one is expected to be. + +In practice, the count is often 2 because there is precisely one state +on so_stateids. If there were more, this would fail. + +In my testing I see two circumstances when RELEASE_LOCKOWNER is called. +In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in +all the lock states being removed, and so the lockowner being discarded +(it is removed when there are no more references which usually happens +when the lock state is discarded). When nfsd4_release_lockowner() finds +that the lock owner doesn't exist, it returns success. + +The other case shows an so_count of '2' and precisely one state listed +in so_stateid. It appears that the Linux client uses a separate lock +owner for each file resulting in one lock state per lock owner, so this +test on '2' is safe. For another client it might not be safe. + +So this patch changes check_for_locks() to use the (newish) +find_any_file_locked() so that it doesn't take a reference on the +nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With +this check is it safe to restore the use of check_for_locks() rather +than testing so_count against the mysterious '2'. + +Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # v6.2+ +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, st + { + struct file_lock *fl; + int status = false; +- struct nfsd_file *nf = find_any_file(fp); ++ struct nfsd_file *nf; + struct inode *inode; + struct file_lock_context *flctx; + ++ spin_lock(&fp->fi_lock); ++ nf = find_any_file_locked(fp); + if (!nf) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); +- return status; ++ goto out; + } + + inode = file_inode(nf->nf_file); +@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, st + } + spin_unlock(&flctx->flc_lock); + } +- nfsd_file_put(nf); ++out: ++ spin_unlock(&fp->fi_lock); + return status; + } + +@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, st + * @cstate: NFSv4 COMPOUND state + * @u: RELEASE_LOCKOWNER arguments + * +- * The lockowner's so_count is bumped when a lock record is added +- * or when copying a conflicting lock. The latter case is brief, +- * but can lead to fleeting false positives when looking for +- * locks-in-use. ++ * Check if theree are any locks still held and if not - free the lockowner ++ * and any lock state that is owned. + * + * Return values: + * %nfs_ok: lockowner released or not found +@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst + spin_unlock(&clp->cl_lock); + return nfs_ok; + } +- if (atomic_read(&lo->lo_owner.so_count) != 2) { +- spin_unlock(&clp->cl_lock); +- nfs4_put_stateowner(&lo->lo_owner); +- return nfserr_locks_held; ++ ++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { ++ if (check_for_locks(stp->st_stid.sc_file, lo)) { ++ spin_unlock(&clp->cl_lock); ++ nfs4_put_stateowner(&lo->lo_owner); ++ return nfserr_locks_held; ++ } + } + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { diff --git a/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch b/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch new file mode 100644 index 00000000000..fe9ecf23171 --- /dev/null +++ b/queue-6.7/ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch @@ -0,0 +1,459 @@ +From 420332b94119cdc7db4477cc88484691cb92ae71 Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Sat, 20 Jan 2024 12:18:39 +0200 +Subject: ovl: mark xwhiteouts directory with overlay.opaque='x' + +From: Amir Goldstein + +commit 420332b94119cdc7db4477cc88484691cb92ae71 upstream. + +An opaque directory cannot have xwhiteouts, so instead of marking an +xwhiteouts directory with a new xattr, overload overlay.opaque xattr +for marking both opaque dir ('y') and xwhiteouts dir ('x'). + +This is more efficient as the overlay.opaque xattr is checked during +lookup of directory anyway. + +This also prevents unnecessary checking the xattr when reading a +directory without xwhiteouts, i.e. most of the time. + +Note that the xwhiteouts marker is not checked on the upper layer and +on the last layer in lowerstack, where xwhiteouts are not expected. + +Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout") +Cc: # v6.7 +Reviewed-by: Alexander Larsson +Tested-by: Alexander Larsson +Signed-off-by: Amir Goldstein +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/filesystems/overlayfs.rst | 16 +++++++-- + fs/overlayfs/namei.c | 43 ++++++++++++++++--------- + fs/overlayfs/overlayfs.h | 23 ++++++++++--- + fs/overlayfs/ovl_entry.h | 4 +- + fs/overlayfs/readdir.c | 7 ++-- + fs/overlayfs/super.c | 15 +++++++++ + fs/overlayfs/util.c | 53 ++++++++++++++++++-------------- + 7 files changed, 110 insertions(+), 51 deletions(-) + +--- a/Documentation/filesystems/overlayfs.rst ++++ b/Documentation/filesystems/overlayfs.rst +@@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs + that files have been removed. This is done using whiteouts and opaque + directories (non-directories are always opaque). + +-A whiteout is created as a character device with 0/0 device number. ++A whiteout is created as a character device with 0/0 device number or ++as a zero-size regular file with the xattr "trusted.overlay.whiteout". ++ + When a whiteout is found in the upper level of a merged directory, any + matching name in the lower level is ignored, and the whiteout itself + is also hidden. +@@ -154,6 +156,13 @@ A directory is made opaque by setting th + to "y". Where the upper filesystem contains an opaque directory, any + directory in the lower filesystem with the same name is ignored. + ++An opaque directory should not conntain any whiteouts, because they do not ++serve any purpose. A merge directory containing regular files with the xattr ++"trusted.overlay.whiteout", should be additionally marked by setting the xattr ++"trusted.overlay.opaque" to "x" on the merge directory itself. ++This is needed to avoid the overhead of checking the "trusted.overlay.whiteout" ++on all entries during readdir in the common case. ++ + readdir + ------- + +@@ -534,8 +543,9 @@ A lower dir with a regular whiteout will + mount, so to support storing an effective whiteout file in an overlayfs mount an + alternative form of whiteout is supported. This form is a regular, zero-size + file with the "overlay.whiteout" xattr set, inside a directory with the +-"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs, +-but can be used by userspace tools (like containers) that generate lower layers. ++"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_). ++These alternative whiteouts are never created by overlayfs, but can be used by ++userspace tools (like containers) that generate lower layers. + These alternative whiteouts can be escaped using the standard xattr escape + mechanism in order to properly nest to any depth. + +--- a/fs/overlayfs/namei.c ++++ b/fs/overlayfs/namei.c +@@ -18,10 +18,11 @@ + + struct ovl_lookup_data { + struct super_block *sb; +- struct vfsmount *mnt; ++ const struct ovl_layer *layer; + struct qstr name; + bool is_dir; + bool opaque; ++ bool xwhiteouts; + bool stop; + bool last; + char *redirect; +@@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct + return real; + } + +-static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path) +-{ +- return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE); +-} +- + static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, + const char *name, + struct dentry *base, int len, + bool drop_negative) + { +- struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len); ++ struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name, ++ base, len); + + if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { + if (drop_negative && ret->d_lockref.count == 1) { +@@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dent + size_t prelen, const char *post, + struct dentry **ret, bool drop_negative) + { ++ struct ovl_fs *ofs = OVL_FS(d->sb); + struct dentry *this; + struct path path; + int err; + bool last_element = !post[0]; ++ bool is_upper = d->layer->idx == 0; ++ char val; + + this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); + if (IS_ERR(this)) { +@@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dent + } + + path.dentry = this; +- path.mnt = d->mnt; +- if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) { ++ path.mnt = d->layer->mnt; ++ if (ovl_path_is_whiteout(ofs, &path)) { + d->stop = d->opaque = true; + goto put_and_out; + } +@@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dent + d->stop = true; + goto put_and_out; + } +- err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL); ++ err = ovl_check_metacopy_xattr(ofs, &path, NULL); + if (err < 0) + goto out_err; + +@@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dent + if (d->last) + goto out; + +- if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { ++ /* overlay.opaque=x means xwhiteouts directory */ ++ val = ovl_get_opaquedir_val(ofs, &path); ++ if (last_element && !is_upper && val == 'x') { ++ d->xwhiteouts = true; ++ ovl_layer_set_xwhiteouts(ofs, d->layer); ++ } else if (val == 'y') { + d->stop = true; + if (last_element) + d->opaque = true; +@@ -863,7 +868,8 @@ fail: + * Returns next layer in stack starting from top. + * Returns -1 if this is the last layer. + */ +-int ovl_path_next(int idx, struct dentry *dentry, struct path *path) ++int ovl_path_next(int idx, struct dentry *dentry, struct path *path, ++ const struct ovl_layer **layer) + { + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerstack = ovl_lowerstack(oe); +@@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry + BUG_ON(idx < 0); + if (idx == 0) { + ovl_path_upper(dentry, path); +- if (path->dentry) ++ if (path->dentry) { ++ *layer = &OVL_FS(dentry->d_sb)->layers[0]; + return ovl_numlower(oe) ? 1 : -1; ++ } + idx++; + } + BUG_ON(idx > ovl_numlower(oe)); + path->dentry = lowerstack[idx - 1].dentry; +- path->mnt = lowerstack[idx - 1].layer->mnt; ++ *layer = lowerstack[idx - 1].layer; ++ path->mnt = (*layer)->mnt; + + return (idx < ovl_numlower(oe)) ? idx + 1 : -1; + } +@@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode * + old_cred = ovl_override_creds(dentry->d_sb); + upperdir = ovl_dentry_upper(dentry->d_parent); + if (upperdir) { +- d.mnt = ovl_upper_mnt(ofs); ++ d.layer = &ofs->layers[0]; + err = ovl_lookup_layer(upperdir, &d, &upperdentry, true); + if (err) + goto out; +@@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode * + else if (d.is_dir || !ofs->numdatalayer) + d.last = lower.layer->idx == ovl_numlower(roe); + +- d.mnt = lower.layer->mnt; ++ d.layer = lower.layer; + err = ovl_lookup_layer(lower.dentry, &d, &this, false); + if (err) + goto out_put; +@@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode * + + if (upperopaque) + ovl_dentry_set_opaque(dentry); ++ if (d.xwhiteouts) ++ ovl_dentry_set_xwhiteouts(dentry); + + if (upperdentry) + ovl_dentry_set_upper_alias(dentry); +--- a/fs/overlayfs/overlayfs.h ++++ b/fs/overlayfs/overlayfs.h +@@ -50,7 +50,6 @@ enum ovl_xattr { + OVL_XATTR_METACOPY, + OVL_XATTR_PROTATTR, + OVL_XATTR_XWHITEOUT, +- OVL_XATTR_XWHITEOUTS, + }; + + enum ovl_inode_flag { +@@ -70,6 +69,8 @@ enum ovl_entry_flag { + OVL_E_UPPER_ALIAS, + OVL_E_OPAQUE, + OVL_E_CONNECTED, ++ /* Lower stack may contain xwhiteout entries */ ++ OVL_E_XWHITEOUTS, + }; + + enum { +@@ -471,6 +472,10 @@ bool ovl_dentry_test_flag(unsigned long + bool ovl_dentry_is_opaque(struct dentry *dentry); + bool ovl_dentry_is_whiteout(struct dentry *dentry); + void ovl_dentry_set_opaque(struct dentry *dentry); ++bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); ++void ovl_dentry_set_xwhiteouts(struct dentry *dentry); ++void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, ++ const struct ovl_layer *layer); + bool ovl_dentry_has_upper_alias(struct dentry *dentry); + void ovl_dentry_set_upper_alias(struct dentry *dentry); + bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); +@@ -488,11 +493,10 @@ struct file *ovl_path_open(const struct + int ovl_copy_up_start(struct dentry *dentry, int flags); + void ovl_copy_up_end(struct dentry *dentry); + bool ovl_already_copied_up(struct dentry *dentry, int flags); +-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, +- enum ovl_xattr ox); ++char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, ++ enum ovl_xattr ox); + bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); + bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); +-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path); + bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, + const struct path *upperpath); + +@@ -567,7 +571,13 @@ static inline bool ovl_is_impuredir(stru + .mnt = ovl_upper_mnt(ofs), + }; + +- return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE); ++ return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; ++} ++ ++static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, ++ const struct path *path) ++{ ++ return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); + } + + static inline bool ovl_redirect_follow(struct ovl_fs *ofs) +@@ -674,7 +684,8 @@ int ovl_get_index_name(struct ovl_fs *of + struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); + struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify); +-int ovl_path_next(int idx, struct dentry *dentry, struct path *path); ++int ovl_path_next(int idx, struct dentry *dentry, struct path *path, ++ const struct ovl_layer **layer); + int ovl_verify_lowerdata(struct dentry *dentry); + struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +--- a/fs/overlayfs/ovl_entry.h ++++ b/fs/overlayfs/ovl_entry.h +@@ -40,6 +40,8 @@ struct ovl_layer { + int idx; + /* One fsid per unique underlying sb (upper fsid == 0) */ + int fsid; ++ /* xwhiteouts were found on this layer */ ++ bool has_xwhiteouts; + }; + + struct ovl_path { +@@ -59,7 +61,7 @@ struct ovl_fs { + unsigned int numfs; + /* Number of data-only lower layers */ + unsigned int numdatalayer; +- const struct ovl_layer *layers; ++ struct ovl_layer *layers; + struct ovl_sb *fs; + /* workbasedir is the path at workdir= mount option */ + struct dentry *workbasedir; +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -305,8 +305,6 @@ static inline int ovl_dir_read(const str + if (IS_ERR(realfile)) + return PTR_ERR(realfile); + +- rdd->in_xwhiteouts_dir = rdd->dentry && +- ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath); + rdd->first_maybe_whiteout = NULL; + rdd->ctx.pos = 0; + do { +@@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct de + .is_lowest = false, + }; + int idx, next; ++ const struct ovl_layer *layer; + + for (idx = 0; idx != -1; idx = next) { +- next = ovl_path_next(idx, dentry, &realpath); ++ next = ovl_path_next(idx, dentry, &realpath, &layer); + rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; ++ rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && ++ ovl_dentry_has_xwhiteouts(dentry); + + if (next != -1) { + err = ovl_dir_read(&realpath, &rdd); +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -1250,6 +1250,7 @@ static struct dentry *ovl_get_root(struc + struct ovl_entry *oe) + { + struct dentry *root; ++ struct ovl_fs *ofs = OVL_FS(sb); + struct ovl_path *lowerpath = ovl_lowerstack(oe); + unsigned long ino = d_inode(lowerpath->dentry)->i_ino; + int fsid = lowerpath->layer->fsid; +@@ -1271,6 +1272,20 @@ static struct dentry *ovl_get_root(struc + ovl_set_flag(OVL_IMPURE, d_inode(root)); + } + ++ /* Look for xwhiteouts marker except in the lowermost layer */ ++ for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) { ++ struct path path = { ++ .mnt = lowerpath->layer->mnt, ++ .dentry = lowerpath->dentry, ++ }; ++ ++ /* overlay.opaque=x means xwhiteouts directory */ ++ if (ovl_get_opaquedir_val(ofs, &path) == 'x') { ++ ovl_layer_set_xwhiteouts(ofs, lowerpath->layer); ++ ovl_dentry_set_xwhiteouts(root); ++ } ++ } ++ + /* Root is always merge -> can have whiteouts */ + ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); + ovl_dentry_set_flag(OVL_E_CONNECTED, root); +--- a/fs/overlayfs/util.c ++++ b/fs/overlayfs/util.c +@@ -461,6 +461,33 @@ void ovl_dentry_set_opaque(struct dentry + ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); + } + ++bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) ++{ ++ return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); ++} ++ ++void ovl_dentry_set_xwhiteouts(struct dentry *dentry) ++{ ++ ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); ++} ++ ++/* ++ * ovl_layer_set_xwhiteouts() is called before adding the overlay dir ++ * dentry to dcache, while readdir of that same directory happens after ++ * the overlay dir dentry is in dcache, so if some cpu observes that ++ * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts ++ * for the layers where xwhiteouts marker was found in that merge dir. ++ */ ++void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, ++ const struct ovl_layer *layer) ++{ ++ if (layer->has_xwhiteouts) ++ return; ++ ++ /* Write once to read-mostly layer properties */ ++ ofs->layers[layer->idx].has_xwhiteouts = true; ++} ++ + /* + * For hard links and decoded file handles, it's possible for ovl_dentry_upper() + * to return positive, while there's no actual upper alias for the inode. +@@ -739,19 +766,6 @@ bool ovl_path_check_xwhiteout_xattr(stru + return res >= 0; + } + +-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path) +-{ +- struct dentry *dentry = path->dentry; +- int res; +- +- /* xattr.whiteouts must be a directory */ +- if (!d_is_dir(dentry)) +- return false; +- +- res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0); +- return res >= 0; +-} +- + /* + * Load persistent uuid from xattr into s_uuid if found, or store a new + * random generated value in s_uuid and in xattr. +@@ -811,20 +825,17 @@ fail: + return false; + } + +-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, +- enum ovl_xattr ox) ++char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, ++ enum ovl_xattr ox) + { + int res; + char val; + + if (!d_is_dir(path->dentry)) +- return false; ++ return 0; + + res = ovl_path_getxattr(ofs, path, ox, &val, 1); +- if (res == 1 && val == 'y') +- return true; +- +- return false; ++ return res == 1 ? val : 0; + } + + #define OVL_XATTR_OPAQUE_POSTFIX "opaque" +@@ -837,7 +848,6 @@ bool ovl_path_check_dir_xattr(struct ovl + #define OVL_XATTR_METACOPY_POSTFIX "metacopy" + #define OVL_XATTR_PROTATTR_POSTFIX "protattr" + #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" +-#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts" + + #define OVL_XATTR_TAB_ENTRY(x) \ + [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ +@@ -854,7 +864,6 @@ const char *const ovl_xattr_table[][2] = + OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), +- OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS), + }; + + int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, diff --git a/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch b/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch new file mode 100644 index 00000000000..0c1f52a67c5 --- /dev/null +++ b/queue-6.7/platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch @@ -0,0 +1,266 @@ +From 416de0246f35f43d871a57939671fe814f4455ee Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Thu, 4 Jan 2024 15:59:03 -0700 +Subject: platform/x86: intel-uncore-freq: Fix types in sysfs callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Nathan Chancellor + +commit 416de0246f35f43d871a57939671fe814f4455ee upstream. + +When booting a kernel with CONFIG_CFI_CLANG, there is a CFI failure when +accessing any of the values under +/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00: + + $ cat /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz + fish: Job 1, 'cat /sys/devices/system/cpu/int…' terminated by signal SIGSEGV (Address boundary error) + + $ sudo dmesg &| grep 'CFI failure' + [ 170.953925] CFI failure at kobj_attr_show+0x19/0x30 (target: show_max_freq_khz+0x0/0xc0 [intel_uncore_frequency_common]; expected type: 0xd34078c5 + +The sysfs callback functions such as show_domain_id() are written as if +they are going to be called by dev_attr_show() but as the above message +shows, they are instead called by kobj_attr_show(). kCFI checks that the +destination of an indirect jump has the exact same type as the prototype +of the function pointer it is called through and fails when they do not. + +These callbacks are called through kobj_attr_show() because +uncore_root_kobj was initialized with kobject_create_and_add(), which +means uncore_root_kobj has a ->sysfs_ops of kobj_sysfs_ops from +kobject_create(), which uses kobj_attr_show() as its ->show() value. + +The only reason there has not been a more noticeable problem until this +point is that 'struct kobj_attribute' and 'struct device_attribute' have +the same layout, so getting the callback from container_of() works the +same with either value. + +Change all the callbacks and their uses to be compatible with +kobj_attr_show() and kobj_attr_store(), which resolves the kCFI failure +and allows the sysfs files to work properly. + +Closes: https://github.com/ClangBuiltLinux/linux/issues/1974 +Fixes: ae7b2ce57851 ("platform/x86/intel/uncore-freq: Use sysfs API to create attributes") +Cc: stable@vger.kernel.org +Signed-off-by: Nathan Chancellor +Reviewed-by: Sami Tolvanen +Acked-by: Srinivas Pandruvada +Link: https://lore.kernel.org/r/20240104-intel-uncore-freq-kcfi-fix-v1-1-bf1e8939af40@kernel.org +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c | 82 +++++----- + drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h | 32 +-- + 2 files changed, 57 insertions(+), 57 deletions(-) + +--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c ++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +@@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_ + static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max); + static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq); + +-static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf) ++static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) + { +- struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr); ++ struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr); + + return sprintf(buf, "%u\n", data->domain_id); + } + +-static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf) ++static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) + { +- struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr); ++ struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr); + + return sprintf(buf, "%u\n", data->cluster_id); + } + +-static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf) ++static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) + { +- struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr); ++ struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr); + + return sprintf(buf, "%u\n", data->package_id); + } +@@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz + } + + #define store_uncore_min_max(name, min_max) \ +- static ssize_t store_##name(struct device *dev, \ +- struct device_attribute *attr, \ ++ static ssize_t store_##name(struct kobject *kobj, \ ++ struct kobj_attribute *attr, \ + const char *buf, size_t count) \ + { \ +- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ ++ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ + \ + return store_min_max_freq_khz(data, buf, count, \ + min_max); \ + } + + #define show_uncore_min_max(name, min_max) \ +- static ssize_t show_##name(struct device *dev, \ +- struct device_attribute *attr, char *buf)\ ++ static ssize_t show_##name(struct kobject *kobj, \ ++ struct kobj_attribute *attr, char *buf)\ + { \ +- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ ++ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ + \ + return show_min_max_freq_khz(data, buf, min_max); \ + } + + #define show_uncore_perf_status(name) \ +- static ssize_t show_##name(struct device *dev, \ +- struct device_attribute *attr, char *buf)\ ++ static ssize_t show_##name(struct kobject *kobj, \ ++ struct kobj_attribute *attr, char *buf)\ + { \ +- struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ ++ struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ + \ + return show_perf_status_freq_khz(data, buf); \ + } +@@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1); + show_uncore_perf_status(current_freq_khz); + + #define show_uncore_data(member_name) \ +- static ssize_t show_##member_name(struct device *dev, \ +- struct device_attribute *attr, char *buf)\ ++ static ssize_t show_##member_name(struct kobject *kobj, \ ++ struct kobj_attribute *attr, char *buf)\ + { \ + struct uncore_data *data = container_of(attr, struct uncore_data,\ +- member_name##_dev_attr);\ ++ member_name##_kobj_attr);\ + \ + return sysfs_emit(buf, "%u\n", \ + data->member_name); \ +@@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz); + + #define init_attribute_rw(_name) \ + do { \ +- sysfs_attr_init(&data->_name##_dev_attr.attr); \ +- data->_name##_dev_attr.show = show_##_name; \ +- data->_name##_dev_attr.store = store_##_name; \ +- data->_name##_dev_attr.attr.name = #_name; \ +- data->_name##_dev_attr.attr.mode = 0644; \ ++ sysfs_attr_init(&data->_name##_kobj_attr.attr); \ ++ data->_name##_kobj_attr.show = show_##_name; \ ++ data->_name##_kobj_attr.store = store_##_name; \ ++ data->_name##_kobj_attr.attr.name = #_name; \ ++ data->_name##_kobj_attr.attr.mode = 0644; \ + } while (0) + + #define init_attribute_ro(_name) \ + do { \ +- sysfs_attr_init(&data->_name##_dev_attr.attr); \ +- data->_name##_dev_attr.show = show_##_name; \ +- data->_name##_dev_attr.store = NULL; \ +- data->_name##_dev_attr.attr.name = #_name; \ +- data->_name##_dev_attr.attr.mode = 0444; \ ++ sysfs_attr_init(&data->_name##_kobj_attr.attr); \ ++ data->_name##_kobj_attr.show = show_##_name; \ ++ data->_name##_kobj_attr.store = NULL; \ ++ data->_name##_kobj_attr.attr.name = #_name; \ ++ data->_name##_kobj_attr.attr.mode = 0444; \ + } while (0) + + #define init_attribute_root_ro(_name) \ + do { \ +- sysfs_attr_init(&data->_name##_dev_attr.attr); \ +- data->_name##_dev_attr.show = show_##_name; \ +- data->_name##_dev_attr.store = NULL; \ +- data->_name##_dev_attr.attr.name = #_name; \ +- data->_name##_dev_attr.attr.mode = 0400; \ ++ sysfs_attr_init(&data->_name##_kobj_attr.attr); \ ++ data->_name##_kobj_attr.show = show_##_name; \ ++ data->_name##_kobj_attr.store = NULL; \ ++ data->_name##_kobj_attr.attr.name = #_name; \ ++ data->_name##_kobj_attr.attr.mode = 0400; \ + } while (0) + + static int create_attr_group(struct uncore_data *data, char *name) +@@ -186,21 +186,21 @@ static int create_attr_group(struct unco + + if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) { + init_attribute_root_ro(domain_id); +- data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr; ++ data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr; + init_attribute_root_ro(fabric_cluster_id); +- data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr; ++ data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr; + init_attribute_root_ro(package_id); +- data->uncore_attrs[index++] = &data->package_id_dev_attr.attr; ++ data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr; + } + +- data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; +- data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; +- data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; +- data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; ++ data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; ++ data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; ++ data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; ++ data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; + + ret = uncore_read_freq(data, &freq); + if (!ret) +- data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; ++ data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; + + data->uncore_attrs[index] = NULL; + +--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h ++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +@@ -26,14 +26,14 @@ + * @instance_id: Unique instance id to append to directory name + * @name: Sysfs entry name for this instance + * @uncore_attr_group: Attribute group storage +- * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz +- * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz +- * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz +- * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz +- * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz +- * @domain_id_dev_attr: Storage for device attribute domain_id +- * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id +- * @package_id_dev_attr: Storage for device attribute package_id ++ * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz ++ * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz ++ * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz ++ * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz ++ * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz ++ * @domain_id_kobj_attr: Storage for kobject attribute domain_id ++ * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id ++ * @package_id_kobj_attr: Storage for kobject attribute package_id + * @uncore_attrs: Attribute storage for group creation + * + * This structure is used to encapsulate all data related to uncore sysfs +@@ -53,14 +53,14 @@ struct uncore_data { + char name[32]; + + struct attribute_group uncore_attr_group; +- struct device_attribute max_freq_khz_dev_attr; +- struct device_attribute min_freq_khz_dev_attr; +- struct device_attribute initial_max_freq_khz_dev_attr; +- struct device_attribute initial_min_freq_khz_dev_attr; +- struct device_attribute current_freq_khz_dev_attr; +- struct device_attribute domain_id_dev_attr; +- struct device_attribute fabric_cluster_id_dev_attr; +- struct device_attribute package_id_dev_attr; ++ struct kobj_attribute max_freq_khz_kobj_attr; ++ struct kobj_attribute min_freq_khz_kobj_attr; ++ struct kobj_attribute initial_max_freq_khz_kobj_attr; ++ struct kobj_attribute initial_min_freq_khz_kobj_attr; ++ struct kobj_attribute current_freq_khz_kobj_attr; ++ struct kobj_attribute domain_id_kobj_attr; ++ struct kobj_attribute fabric_cluster_id_kobj_attr; ++ struct kobj_attribute package_id_kobj_attr; + struct attribute *uncore_attrs[9]; + }; + diff --git a/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch b/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch new file mode 100644 index 00000000000..bedf4efbc4d --- /dev/null +++ b/queue-6.7/platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch @@ -0,0 +1,293 @@ +From 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b Mon Sep 17 00:00:00 2001 +From: Shin'ichiro Kawasaki +Date: Mon, 8 Jan 2024 15:20:58 +0900 +Subject: platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shin'ichiro Kawasaki + +commit 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b upstream. + +p2sb_bar() unhides P2SB device to get resources from the device. It +guards the operation by locking pci_rescan_remove_lock so that parallel +rescans do not find the P2SB device. However, this lock causes deadlock +when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan +locks pci_rescan_remove_lock and probes PCI devices. When PCI devices +call p2sb_bar() during probe, it locks pci_rescan_remove_lock again. +Hence the deadlock. + +To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar(). +Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources() +for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(), +refer the cache and return to the caller. + +Before operating the device at P2SB DEVFN for resource cache, check +that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH +specifications define. This avoids unexpected operation to other devices +at the same DEVFN. + +Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/ +Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support") +Cc: stable@vger.kernel.org +Suggested-by: Andy Shevchenko +Signed-off-by: Shin'ichiro Kawasaki +Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com +Reviewed-by: Andy Shevchenko +Reviewed-by: Ilpo Järvinen +Tested-by Klara Modin +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/p2sb.c | 180 +++++++++++++++++++++++++++++++++----------- + 1 file changed, 139 insertions(+), 41 deletions(-) + +--- a/drivers/platform/x86/p2sb.c ++++ b/drivers/platform/x86/p2sb.c +@@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ + {} + }; + ++/* ++ * Cache BAR0 of P2SB device functions 0 to 7. ++ * TODO: The constant 8 is the number of functions that PCI specification ++ * defines. Same definitions exist tree-wide. Unify this definition and ++ * the other definitions then move to include/uapi/linux/pci.h. ++ */ ++#define NR_P2SB_RES_CACHE 8 ++ ++struct p2sb_res_cache { ++ u32 bus_dev_id; ++ struct resource res; ++}; ++ ++static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; ++ + static int p2sb_get_devfn(unsigned int *devfn) + { + unsigned int fn = P2SB_DEVFN_DEFAULT; +@@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int * + return 0; + } + ++static bool p2sb_valid_resource(struct resource *res) ++{ ++ if (res->flags) ++ return true; ++ ++ return false; ++} ++ + /* Copy resource from the first BAR of the device in question */ +-static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) ++static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) + { + struct resource *bar0 = &pdev->resource[0]; + +@@ -56,49 +79,66 @@ static int p2sb_read_bar0(struct pci_dev + mem->end = bar0->end; + mem->flags = bar0->flags; + mem->desc = bar0->desc; +- +- return 0; + } + +-static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) ++static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) + { ++ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + struct pci_dev *pdev; +- int ret; + + pdev = pci_scan_single_device(bus, devfn); + if (!pdev) +- return -ENODEV; ++ return; + +- ret = p2sb_read_bar0(pdev, mem); ++ p2sb_read_bar0(pdev, &cache->res); ++ cache->bus_dev_id = bus->dev.id; + + pci_stop_and_remove_bus_device(pdev); +- return ret; + } + +-/** +- * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR +- * @bus: PCI bus to communicate with +- * @devfn: PCI slot and function to communicate with +- * @mem: memory resource to be filled in +- * +- * The BIOS prevents the P2SB device from being enumerated by the PCI +- * subsystem, so we need to unhide and hide it back to lookup the BAR. +- * +- * if @bus is NULL, the bus 0 in domain 0 will be used. +- * If @devfn is 0, it will be replaced by devfn of the P2SB device. +- * +- * Caller must provide a valid pointer to @mem. +- * +- * Locking is handled by pci_rescan_remove_lock mutex. +- * +- * Return: +- * 0 on success or appropriate errno value on error. +- */ +-int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) ++static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) ++{ ++ unsigned int slot, fn; ++ ++ if (PCI_FUNC(devfn) == 0) { ++ /* ++ * When function number of the P2SB device is zero, scan it and ++ * other function numbers, and if devices are available, cache ++ * their BAR0s. ++ */ ++ slot = PCI_SLOT(devfn); ++ for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) ++ p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); ++ } else { ++ /* Scan the P2SB device and cache its BAR0 */ ++ p2sb_scan_and_cache_devfn(bus, devfn); ++ } ++ ++ if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) ++ return -ENOENT; ++ ++ return 0; ++} ++ ++static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) ++{ ++ static struct pci_bus *p2sb_bus; ++ ++ bus = bus ?: p2sb_bus; ++ if (bus) ++ return bus; ++ ++ /* Assume P2SB is on the bus 0 in domain 0 */ ++ p2sb_bus = pci_find_bus(0, 0); ++ return p2sb_bus; ++} ++ ++static int p2sb_cache_resources(void) + { +- struct pci_dev *pdev_p2sb; + unsigned int devfn_p2sb; + u32 value = P2SBC_HIDE; ++ struct pci_bus *bus; ++ u16 class; + int ret; + + /* Get devfn for P2SB device itself */ +@@ -106,8 +146,17 @@ int p2sb_bar(struct pci_bus *bus, unsign + if (ret) + return ret; + +- /* if @bus is NULL, use bus 0 in domain 0 */ +- bus = bus ?: pci_find_bus(0, 0); ++ bus = p2sb_get_bus(NULL); ++ if (!bus) ++ return -ENODEV; ++ ++ /* ++ * When a device with same devfn exists and its device class is not ++ * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. ++ */ ++ pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); ++ if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) ++ return -ENODEV; + + /* + * Prevent concurrent PCI bus scan from seeing the P2SB device and +@@ -115,17 +164,16 @@ int p2sb_bar(struct pci_bus *bus, unsign + */ + pci_lock_rescan_remove(); + +- /* Unhide the P2SB device, if needed */ ++ /* ++ * The BIOS prevents the P2SB device from being enumerated by the PCI ++ * subsystem, so we need to unhide and hide it back to lookup the BAR. ++ * Unhide the P2SB device here, if needed. ++ */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + +- pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); +- if (devfn) +- ret = p2sb_scan_and_read(bus, devfn, mem); +- else +- ret = p2sb_read_bar0(pdev_p2sb, mem); +- pci_stop_and_remove_bus_device(pdev_p2sb); ++ ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ + if (value & P2SBC_HIDE) +@@ -133,12 +181,62 @@ int p2sb_bar(struct pci_bus *bus, unsign + + pci_unlock_rescan_remove(); + +- if (ret) +- return ret; ++ return ret; ++} + +- if (mem->flags == 0) ++/** ++ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR ++ * @bus: PCI bus to communicate with ++ * @devfn: PCI slot and function to communicate with ++ * @mem: memory resource to be filled in ++ * ++ * If @bus is NULL, the bus 0 in domain 0 will be used. ++ * If @devfn is 0, it will be replaced by devfn of the P2SB device. ++ * ++ * Caller must provide a valid pointer to @mem. ++ * ++ * Return: ++ * 0 on success or appropriate errno value on error. ++ */ ++int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) ++{ ++ struct p2sb_res_cache *cache; ++ int ret; ++ ++ bus = p2sb_get_bus(bus); ++ if (!bus) + return -ENODEV; + ++ if (!devfn) { ++ ret = p2sb_get_devfn(&devfn); ++ if (ret) ++ return ret; ++ } ++ ++ cache = &p2sb_resources[PCI_FUNC(devfn)]; ++ if (cache->bus_dev_id != bus->dev.id) ++ return -ENODEV; ++ ++ if (!p2sb_valid_resource(&cache->res)) ++ return -ENOENT; ++ ++ memcpy(mem, &cache->res, sizeof(*mem)); + return 0; + } + EXPORT_SYMBOL_GPL(p2sb_bar); ++ ++static int __init p2sb_fs_init(void) ++{ ++ p2sb_cache_resources(); ++ return 0; ++} ++ ++/* ++ * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can ++ * not be locked in sysfs pci bus rescan path because of deadlock. To ++ * avoid the deadlock, access to P2SB devices with the lock at an early ++ * step in kernel initialization and cache required resources. This ++ * should happen after subsys_initcall which initializes PCI subsystem ++ * and before device_initcall which requires P2SB resources. ++ */ ++fs_initcall(p2sb_fs_init); diff --git a/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch b/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch new file mode 100644 index 00000000000..18f034ec58e --- /dev/null +++ b/queue-6.7/rbd-don-t-move-requests-to-the-running-list-on-errors.patch @@ -0,0 +1,77 @@ +From ded080c86b3f99683774af0441a58fc2e3d60cae Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Wed, 17 Jan 2024 18:59:44 +0100 +Subject: rbd: don't move requests to the running list on errors + +From: Ilya Dryomov + +commit ded080c86b3f99683774af0441a58fc2e3d60cae upstream. + +The running list is supposed to contain requests that are pinning the +exclusive lock, i.e. those that must be flushed before exclusive lock +is released. When wake_lock_waiters() is called to handle an error, +requests on the acquiring list are failed with that error and no +flushing takes place. Briefly moving them to the running list is not +only pointless but also harmful: if exclusive lock gets acquired +before all of their state machines are scheduled and go through +rbd_lock_del_request(), we trigger + + rbd_assert(list_empty(&rbd_dev->running_list)); + +in rbd_try_acquire_lock(). + +Cc: stable@vger.kernel.org +Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") +Signed-off-by: Ilya Dryomov +Reviewed-by: Dongsheng Yang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/rbd.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct + static void rbd_lock_del_request(struct rbd_img_request *img_req) + { + struct rbd_device *rbd_dev = img_req->rbd_dev; +- bool need_wakeup; ++ bool need_wakeup = false; + + lockdep_assert_held(&rbd_dev->lock_rwsem); + spin_lock(&rbd_dev->lock_lists_lock); +- rbd_assert(!list_empty(&img_req->lock_item)); +- list_del_init(&img_req->lock_item); +- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && +- list_empty(&rbd_dev->running_list)); ++ if (!list_empty(&img_req->lock_item)) { ++ list_del_init(&img_req->lock_item); ++ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && ++ list_empty(&rbd_dev->running_list)); ++ } + spin_unlock(&rbd_dev->lock_lists_lock); + if (need_wakeup) + complete(&rbd_dev->releasing_wait); +@@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd + return; + } + +- list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { ++ while (!list_empty(&rbd_dev->acquiring_list)) { ++ img_req = list_first_entry(&rbd_dev->acquiring_list, ++ struct rbd_img_request, lock_item); + mutex_lock(&img_req->state_mutex); + rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); ++ if (!result) ++ list_move_tail(&img_req->lock_item, ++ &rbd_dev->running_list); ++ else ++ list_del_init(&img_req->lock_item); + rbd_img_schedule(img_req, result); + mutex_unlock(&img_req->state_mutex); + } +- +- list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); + } + + static bool locker_equal(const struct ceph_locker *lhs, diff --git a/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch b/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch new file mode 100644 index 00000000000..cb352760ffb --- /dev/null +++ b/queue-6.7/revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch @@ -0,0 +1,47 @@ +From d1b163aa0749706379055e40a52cf7a851abf9dc Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Tue, 23 Jan 2024 13:09:26 +0100 +Subject: Revert "drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync" + +From: Thomas Zimmermann + +commit d1b163aa0749706379055e40a52cf7a851abf9dc upstream. + +This reverts commit 60aebc9559492cea6a9625f514a8041717e3a2e4. + +Commit 60aebc9559492cea ("drivers/firmware: Move sysfb_init() from +device_initcall to subsys_initcall_sync") messes up initialization order +of the graphics drivers and leads to blank displays on some systems. So +revert the commit. + +To make the display drivers fully independent from initialization +order requires to track framebuffer memory by device and independently +from the loaded drivers. The kernel currently lacks the infrastructure +to do so. + +Reported-by: Jaak Ristioja +Closes: https://lore.kernel.org/dri-devel/ZUnNi3q3yB3zZfTl@P70.localdomain/T/#t +Reported-by: Huacai Chen +Closes: https://lore.kernel.org/dri-devel/20231108024613.2898921-1-chenhuacai@loongson.cn/ +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10133 +Signed-off-by: Thomas Zimmermann +Cc: Javier Martinez Canillas +Cc: Thorsten Leemhuis +Cc: Jani Nikula +Cc: stable@vger.kernel.org # v6.5+ +Reviewed-by: Javier Martinez Canillas +Acked-by: Jani Nikula +Link: https://patchwork.freedesktop.org/patch/msgid/20240123120937.27736-1-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/sysfb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/firmware/sysfb.c ++++ b/drivers/firmware/sysfb.c +@@ -128,4 +128,4 @@ unlock_mutex: + } + + /* must execute after PCI subsystem for EFI quirks */ +-subsys_initcall_sync(sysfb_init); ++device_initcall(sysfb_init); diff --git a/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch b/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch new file mode 100644 index 00000000000..d30df0dcc09 --- /dev/null +++ b/queue-6.7/revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch @@ -0,0 +1,63 @@ +From 6992eb815d087858f8d7e4020529c2fe800456b3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Tue, 16 Jan 2024 23:08:21 +0200 +Subject: Revert "drm/i915/dsi: Do display on sequence later on icl+" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit 6992eb815d087858f8d7e4020529c2fe800456b3 upstream. + +This reverts commit 88b065943cb583e890324d618e8d4b23460d51a3. + +Lenovo 82TQ is unhappy if we do the display on sequence this +late. The display output shows severe corruption. + +It's unclear if this is a failure on our part (perhaps +something to do with sending commands in LP mode after HS +/video mode transmission has been started? Though the backlight +on command at least seems to work) or simply that there are +some commands in the sequence that are needed to be done +earlier (eg. could be some DSC init stuff?). If the latter +then I don't think the current Windows code would work +either, but maybe this was originally tested with an older +driver, who knows. + +Root causing this fully would likely require a lot of +experimentation which isn't really feasible without direct +access to the machine, so let's just accept failure and +go back to the original sequence. + +Cc: stable@vger.kernel.org +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10071 +Signed-off-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20240116210821.30194-1-ville.syrjala@linux.intel.com +Acked-by: Jani Nikula +(cherry picked from commit dc524d05974f615b145404191fcf91b478950499) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/i915/display/icl_dsi.c ++++ b/drivers/gpu/drm/i915/display/icl_dsi.c +@@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(stru + } + + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); ++ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + + /* ensure all panel commands dispatched before enabling transcoder */ + wait_for_cmds_dispatched_to_panel(encoder); +@@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct inte + /* step6d: enable dsi transcoder */ + gen11_dsi_enable_transcoder(encoder); + +- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); +- + /* step7: enable backlight */ + intel_backlight_enable(crtc_state, conn_state); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); diff --git a/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch b/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch new file mode 100644 index 00000000000..09738a2e9b4 --- /dev/null +++ b/queue-6.7/revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch @@ -0,0 +1,116 @@ +From 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Sat, 27 Jan 2024 04:04:34 +1000 +Subject: Revert "nouveau: push event block/allowing out of the fence context" + +From: Dave Airlie + +commit 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 upstream. + +This reverts commit eacabb5462717a52fccbbbba458365a4f5e61f35. + +This commit causes some regressions in desktop usage, this will +reintroduce the original deadlock in DRI_PRIME situations, I've +got an idea to fix it by offloading to a workqueue in a different +spot, however this code has a race condition where we sometimes +miss interrupts so I'd like to fix that as well. + +Cc: stable@vger.kernel.org +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_fence.c | 28 +++++----------------------- + drivers/gpu/drm/nouveau/nouveau_fence.h | 5 +---- + 2 files changed, 6 insertions(+), 27 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_fence.c ++++ b/drivers/gpu/drm/nouveau/nouveau_fence.c +@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fenc + if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { + struct nouveau_fence_chan *fctx = nouveau_fctx(fence); + +- if (atomic_dec_and_test(&fctx->notify_ref)) ++ if (!--fctx->notify_ref) + drop = 1; + } + +@@ -103,7 +103,6 @@ nouveau_fence_context_kill(struct nouvea + void + nouveau_fence_context_del(struct nouveau_fence_chan *fctx) + { +- cancel_work_sync(&fctx->allow_block_work); + nouveau_fence_context_kill(fctx, 0); + nvif_event_dtor(&fctx->event); + fctx->dead = 1; +@@ -168,18 +167,6 @@ nouveau_fence_wait_uevent_handler(struct + return ret; + } + +-static void +-nouveau_fence_work_allow_block(struct work_struct *work) +-{ +- struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, +- allow_block_work); +- +- if (atomic_read(&fctx->notify_ref) == 0) +- nvif_event_block(&fctx->event); +- else +- nvif_event_allow(&fctx->event); +-} +- + void + nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) + { +@@ -191,7 +178,6 @@ nouveau_fence_context_new(struct nouveau + } args; + int ret; + +- INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); + INIT_LIST_HEAD(&fctx->flip); + INIT_LIST_HEAD(&fctx->pending); + spin_lock_init(&fctx->lock); +@@ -535,19 +521,15 @@ static bool nouveau_fence_enable_signali + struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence_chan *fctx = nouveau_fctx(fence); + bool ret; +- bool do_work; + +- if (atomic_inc_return(&fctx->notify_ref) == 0) +- do_work = true; ++ if (!fctx->notify_ref++) ++ nvif_event_allow(&fctx->event); + + ret = nouveau_fence_no_signaling(f); + if (ret) + set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); +- else if (atomic_dec_and_test(&fctx->notify_ref)) +- do_work = true; +- +- if (do_work) +- schedule_work(&fctx->allow_block_work); ++ else if (!--fctx->notify_ref) ++ nvif_event_block(&fctx->event); + + return ret; + } +--- a/drivers/gpu/drm/nouveau/nouveau_fence.h ++++ b/drivers/gpu/drm/nouveau/nouveau_fence.h +@@ -3,7 +3,6 @@ + #define __NOUVEAU_FENCE_H__ + + #include +-#include + #include + + struct nouveau_drm; +@@ -46,9 +45,7 @@ struct nouveau_fence_chan { + char name[32]; + + struct nvif_event event; +- struct work_struct allow_block_work; +- atomic_t notify_ref; +- int dead, killed; ++ int notify_ref, dead, killed; + }; + + struct nouveau_fence_priv { diff --git a/queue-6.7/series b/queue-6.7/series index e3a392fc5e0..a4805be1f1b 100644 --- a/queue-6.7/series +++ b/queue-6.7/series @@ -219,3 +219,29 @@ selftests-bonding-do-not-test-arp-ns-target-with-mod.patch net-fec-fix-the-unhandled-context-fault-from-smmu.patch tsnep-remove-fcs-for-xdp-data-path.patch tsnep-fix-xdp_ring_need_wakeup-for-empty-fill-ring.patch +btrfs-zoned-fix-lock-ordering-in-btrfs_zone_activate.patch +btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch +btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch +btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch +btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch +btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch +btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch +rbd-don-t-move-requests-to-the-running-list-on-errors.patch +exec-fix-error-handling-in-begin_new_exec.patch +wifi-iwlwifi-fix-a-memory-corruption.patch +nfsd-fix-release_lockowner.patch +ovl-mark-xwhiteouts-directory-with-overlay.opaque-x.patch +hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch +netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch +netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch +platform-x86-intel-uncore-freq-fix-types-in-sysfs-callbacks.patch +platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch +ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch +firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch +revert-drivers-firmware-move-sysfb_init-from-device_initcall-to-subsys_initcall_sync.patch +drm-amdgpu-fix-the-null-pointer-when-load-rlc-firmware.patch +xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch +gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch +cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch +revert-nouveau-push-event-block-allowing-out-of-the-fence-context.patch +revert-drm-i915-dsi-do-display-on-sequence-later-on-icl.patch diff --git a/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch b/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch new file mode 100644 index 00000000000..32bd69ad79e --- /dev/null +++ b/queue-6.7/wifi-iwlwifi-fix-a-memory-corruption.patch @@ -0,0 +1,44 @@ +From cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d Mon Sep 17 00:00:00 2001 +From: Emmanuel Grumbach +Date: Thu, 11 Jan 2024 15:07:25 +0200 +Subject: wifi: iwlwifi: fix a memory corruption + +From: Emmanuel Grumbach + +commit cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d upstream. + +iwl_fw_ini_trigger_tlv::data is a pointer to a __le32, which means that +if we copy to iwl_fw_ini_trigger_tlv::data + offset while offset is in +bytes, we'll write past the buffer. + +Cc: stable@vger.kernel.org +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218233 +Fixes: cf29c5b66b9f ("iwlwifi: dbg_ini: implement time point handling") +Signed-off-by: Emmanuel Grumbach +Signed-off-by: Miri Korenblit +Link: https://msgid.link/20240111150610.2d2b8b870194.I14ed76505a5cf87304e0c9cc05cc0ae85ed3bf91@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +@@ -1,6 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + /* +- * Copyright (C) 2018-2023 Intel Corporation ++ * Copyright (C) 2018-2024 Intel Corporation + */ + #include + #include "iwl-drv.h" +@@ -1096,7 +1096,7 @@ static int iwl_dbg_tlv_override_trig_nod + node_trig = (void *)node_tlv->data; + } + +- memcpy(node_trig->data + offset, trig->data, trig_data_len); ++ memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); + node_tlv->length = cpu_to_le32(size); + + if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { diff --git a/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch b/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch new file mode 100644 index 00000000000..5900e625cf9 --- /dev/null +++ b/queue-6.7/xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch @@ -0,0 +1,127 @@ +From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 16 Jan 2024 15:33:07 +1100 +Subject: xfs: read only mounts with fsopen mount API are busted + +From: Dave Chinner + +commit d8d222e09dab84a17bb65dda4b94d01c565f5327 upstream. + +Recently xfs/513 started failing on my test machines testing "-o +ro,norecovery" mount options. This was being emitted in dmesg: + +[ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only. + +Turns out, readonly mounts with the fsopen()/fsconfig() mount API +have been busted since day zero. It's only taken 5 years for debian +unstable to start using this "new" mount API, and shortly after this +I noticed xfs/513 had started to fail as per above. + +The syscall trace is: + +fsopen("xfs", FSOPEN_CLOEXEC) = 3 +mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument) +..... +fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0 +fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 +fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0 +fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument) +close(3) = 0 + +Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is +what threw out the error. + +During mount instantiation, we call xfs_fs_validate_params() which +does: + + /* No recovery flag requires a read-only mount */ + if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return -EINVAL; + } + +and xfs_is_readonly() checks internal mount flags for read only +state. This state is set in xfs_init_fs_context() from the +context superblock flag state: + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + +With the old mount API, all of the VFS specific superblock flags +had already been parsed and set before xfs_init_fs_context() is +called, so this all works fine. + +However, in the brave new fsopen/fsconfig world, +xfs_init_fs_context() is called from fsopen() context, before any +VFS superblock have been set or parsed. Hence if we use fsopen(), +the internal XFS readonly state is *never set*. Hence anything that +depends on xfs_is_readonly() actually returning true for read only +mounts is broken if fsopen() has been used to mount the filesystem. + +Fix this by moving this internal state initialisation to +xfs_fs_fill_super() before we attempt to validate the parameters +that have been set prior to the FSCONFIG_CMD_CREATE call being made. + +Signed-off-by: Dave Chinner +Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api") +cc: stable@vger.kernel.org +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_super.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -1510,6 +1510,18 @@ xfs_fs_fill_super( + + mp->m_super = sb; + ++ /* ++ * Copy VFS mount flags from the context now that all parameter parsing ++ * is guaranteed to have been completed by either the old mount API or ++ * the newer fsopen/fsconfig API. ++ */ ++ if (fc->sb_flags & SB_RDONLY) ++ set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); ++ if (fc->sb_flags & SB_DIRSYNC) ++ mp->m_features |= XFS_FEAT_DIRSYNC; ++ if (fc->sb_flags & SB_SYNCHRONOUS) ++ mp->m_features |= XFS_FEAT_WSYNC; ++ + error = xfs_fs_validate_params(mp); + if (error) + return error; +@@ -1979,6 +1991,11 @@ static const struct fs_context_operation + .free = xfs_fs_free, + }; + ++/* ++ * WARNING: do not initialise any parameters in this function that depend on ++ * mount option parsing having already been performed as this can be called from ++ * fsopen() before any parameters have been set. ++ */ + static int xfs_init_fs_context( + struct fs_context *fc) + { +@@ -2010,16 +2027,6 @@ static int xfs_init_fs_context( + mp->m_logbsize = -1; + mp->m_allocsize_log = 16; /* 64k */ + +- /* +- * Copy binary VFS mount flags we are interested in. +- */ +- if (fc->sb_flags & SB_RDONLY) +- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); +- if (fc->sb_flags & SB_DIRSYNC) +- mp->m_features |= XFS_FEAT_DIRSYNC; +- if (fc->sb_flags & SB_SYNCHRONOUS) +- mp->m_features |= XFS_FEAT_WSYNC; +- + fc->s_fs_info = mp; + fc->ops = &xfs_context_ops; +