--- /dev/null
+From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Thu, 4 Jan 2024 11:48:47 -0800
+Subject: btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 3324d0547861b16cf436d54abba7052e0c8aa9de upstream.
+
+Sweet Tea spotted a race between subvolume deletion and snapshotting
+that can result in the root item for the snapshot having the
+BTRFS_ROOT_SUBVOL_DEAD flag set. The race is:
+
+Thread 1 | Thread 2
+----------------------------------------------|----------
+btrfs_delete_subvolume |
+ btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)|
+ |btrfs_mksubvol
+ | down_read(subvol_sem)
+ | create_snapshot
+ | ...
+ | create_pending_snapshot
+ | copy root item from source
+ down_write(subvol_sem) |
+
+This flag is only checked in send and swap activate, which this would
+cause to fail mysteriously.
+
+create_snapshot() now checks the root refs to reject a deleted
+subvolume, so we can fix this by locking subvol_sem earlier so that the
+BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically.
+
+CC: stable@vger.kernel.org # 4.14+
+Reported-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4689,6 +4689,8 @@ int btrfs_delete_subvolume(struct inode
+ u64 root_flags;
+ int ret;
+
++ down_write(&fs_info->subvol_sem);
++
+ /*
+ * Don't allow to delete a subvolume with send in progress. This is
+ * inside the inode lock so the error handling that has to drop the bit
+@@ -4700,25 +4702,25 @@ int btrfs_delete_subvolume(struct inode
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu during send",
+ dest->root_key.objectid);
+- return -EPERM;
++ ret = -EPERM;
++ goto out_up_write;
+ }
+ if (atomic_read(&dest->nr_swapfiles)) {
+ spin_unlock(&dest->root_item_lock);
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu with active swapfile",
+ root->root_key.objectid);
+- return -EPERM;
++ ret = -EPERM;
++ goto out_up_write;
+ }
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+
+- down_write(&fs_info->subvol_sem);
+-
+ ret = may_destroy_subvol(dest);
+ if (ret)
+- goto out_up_write;
++ goto out_undead;
+
+ btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
+ /*
+@@ -4728,7 +4730,7 @@ int btrfs_delete_subvolume(struct inode
+ */
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (ret)
+- goto out_up_write;
++ goto out_undead;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+@@ -4794,15 +4796,17 @@ out_end_trans:
+ inode->i_flags |= S_DEAD;
+ out_release:
+ btrfs_subvolume_release_metadata(root, &block_rsv);
+-out_up_write:
+- up_write(&fs_info->subvol_sem);
++out_undead:
+ if (ret) {
+ spin_lock(&dest->root_item_lock);
+ root_flags = btrfs_root_flags(&dest->root_item);
+ btrfs_set_root_flags(&dest->root_item,
+ root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+ spin_unlock(&dest->root_item_lock);
+- } else {
++ }
++out_up_write:
++ up_write(&fs_info->subvol_sem);
++ if (!ret) {
+ d_invalidate(dentry);
+ btrfs_prune_dentries(dest);
+ ASSERT(dest->send_in_progress == 0);
--- /dev/null
+From 173431b274a9a54fc10b273b46e67f46bcf62d2e Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 10 Jan 2024 08:58:26 +1030
+Subject: btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 173431b274a9a54fc10b273b46e67f46bcf62d2e upstream.
+
+Add extra sanity check for btrfs_ioctl_defrag_range_args::flags.
+
+This is not really to enhance fuzzing tests, but as a preparation for
+future expansion on btrfs_ioctl_defrag_range_args.
+
+In the future we're going to add new members, allowing more fine tuning
+for btrfs defrag. Without the -ENONOTSUPP error, there would be no way
+to detect if the kernel supports those new defrag features.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c | 4 ++++
+ include/uapi/linux/btrfs.h | 3 +++
+ 2 files changed, 7 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3500,6 +3500,10 @@ static int btrfs_ioctl_defrag(struct fil
+ ret = -EFAULT;
+ goto out;
+ }
++ if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
++ ret = -EOPNOTSUPP;
++ goto out;
++ }
+ /* compression requires us to start the IO */
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+--- a/include/uapi/linux/btrfs.h
++++ b/include/uapi/linux/btrfs.h
+@@ -585,6 +585,9 @@ struct btrfs_ioctl_clone_range_args {
+ */
+ #define BTRFS_DEFRAG_RANGE_COMPRESS 1
+ #define BTRFS_DEFRAG_RANGE_START_IO 2
++#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \
++ BTRFS_DEFRAG_RANGE_START_IO)
++
+ struct btrfs_ioctl_defrag_range_args {
+ /* start of the defrag operation */
+ __u64 start;
--- /dev/null
+From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001
+From: Omar Sandoval <osandov@fb.com>
+Date: Thu, 4 Jan 2024 11:48:46 -0800
+Subject: btrfs: don't abort filesystem when attempting to snapshot deleted subvolume
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit 7081929ab2572920e94d70be3d332e5c9f97095a upstream.
+
+If the source file descriptor to the snapshot ioctl refers to a deleted
+subvolume, we get the following abort:
+
+ BTRFS: Transaction aborted (error -2)
+ WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs]
+ Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c
+ CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014
+ RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs]
+ RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282
+ RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027
+ RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840
+ RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998
+ R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe
+ R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80
+ FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0
+ Call Trace:
+ <TASK>
+ ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+ ? __warn+0x81/0x130
+ ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+ ? report_bug+0x171/0x1a0
+ ? handle_bug+0x3a/0x70
+ ? exc_invalid_op+0x17/0x70
+ ? asm_exc_invalid_op+0x1a/0x20
+ ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+ ? create_pending_snapshot+0x1040/0x1190 [btrfs]
+ create_pending_snapshots+0x92/0xc0 [btrfs]
+ btrfs_commit_transaction+0x66b/0xf40 [btrfs]
+ btrfs_mksubvol+0x301/0x4d0 [btrfs]
+ btrfs_mksnapshot+0x80/0xb0 [btrfs]
+ __btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs]
+ btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs]
+ btrfs_ioctl+0x8a6/0x2650 [btrfs]
+ ? kmem_cache_free+0x22/0x340
+ ? do_sys_openat2+0x97/0xe0
+ __x64_sys_ioctl+0x97/0xd0
+ do_syscall_64+0x46/0xf0
+ entry_SYSCALL_64_after_hwframe+0x6e/0x76
+ RIP: 0033:0x7fe20abe83af
+ RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+ RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af
+ RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003
+ RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0
+ R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+ R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58
+ </TASK>
+ ---[ end trace 0000000000000000 ]---
+ BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry
+ BTRFS info (device vdc: state EA): forced readonly
+ BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction.
+ BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry
+
+This happens because create_pending_snapshot() initializes the new root
+item as a copy of the source root item. This includes the refs field,
+which is 0 for a deleted subvolume. The call to btrfs_insert_root()
+therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then
+finds the root and returns -ENOENT if refs == 0, which causes
+create_pending_snapshot() to abort.
+
+Fix it by checking the source root's refs before attempting the
+snapshot, but after locking subvol_sem to avoid racing with deletion.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -780,6 +780,9 @@ static int create_snapshot(struct btrfs_
+ return -EOPNOTSUPP;
+ }
+
++ if (btrfs_root_refs(&root->root_item) == 0)
++ return -ENOENT;
++
+ if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
+ return -EINVAL;
+
--- /dev/null
+From a208b3f132b48e1f94f620024e66fea635925877 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 15 Jan 2024 20:30:26 +0100
+Subject: btrfs: don't warn if discard range is not aligned to sector
+
+From: David Sterba <dsterba@suse.com>
+
+commit a208b3f132b48e1f94f620024e66fea635925877 upstream.
+
+There's a warning in btrfs_issue_discard() when the range is not aligned
+to 512 bytes, originally added in 4d89d377bbb0 ("btrfs:
+btrfs_issue_discard ensure offset/length are aligned to sector
+boundaries"). We can't do sub-sector writes anyway so the adjustment is
+the only thing that we can do and the warning is unnecessary.
+
+CC: stable@vger.kernel.org # 4.19+
+Reported-by: syzbot+4a4f1eba14eb5c3417d1@syzkaller.appspotmail.com
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1209,7 +1209,8 @@ static int btrfs_issue_discard(struct bl
+ u64 bytes_left, end;
+ u64 aligned_start = ALIGN(start, 1 << 9);
+
+- if (WARN_ON(start != aligned_start)) {
++ /* Adjust the range to be aligned to 512B sectors if necessary. */
++ if (start != aligned_start) {
+ len -= aligned_start - start;
+ len = round_down(len, 1 << 9);
+ start = aligned_start;
--- /dev/null
+From f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 Mon Sep 17 00:00:00 2001
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Wed, 3 Jan 2024 13:31:27 +0300
+Subject: btrfs: ref-verify: free ref cache before clearing mount opt
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+commit f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 upstream.
+
+As clearing REF_VERIFY mount option indicates there were some errors in a
+ref-verify process, a ref cache is not relevant anymore and should be
+freed.
+
+btrfs_free_ref_cache() requires REF_VERIFY option being set so call
+it just before clearing the mount option.
+
+Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
+
+Reported-by: syzbot+be14ed7728594dc8bd42@syzkaller.appspotmail.com
+Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool")
+CC: stable@vger.kernel.org # 5.4+
+Closes: https://lore.kernel.org/lkml/000000000000e5a65c05ee832054@google.com/
+Reported-by: syzbot+c563a3c79927971f950f@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/0000000000007fe09705fdc6086c@google.com/
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ref-verify.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/ref-verify.c
++++ b/fs/btrfs/ref-verify.c
+@@ -883,8 +883,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_i
+ out_unlock:
+ spin_unlock(&fs_info->ref_verify_lock);
+ out:
+- if (ret)
++ if (ret) {
++ btrfs_free_ref_cache(fs_info);
+ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
++ }
+ return ret;
+ }
+
+@@ -1015,8 +1017,8 @@ int btrfs_build_ref_tree(struct btrfs_fs
+ }
+ }
+ if (ret) {
+- btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ btrfs_free_ref_cache(fs_info);
++ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ }
+ btrfs_free_path(path);
+ return ret;
--- /dev/null
+From f398e70dd69e6ceea71463a5380e6118f219197e Mon Sep 17 00:00:00 2001
+From: Chung-Chiang Cheng <cccheng@synology.com>
+Date: Fri, 12 Jan 2024 15:41:05 +0800
+Subject: btrfs: tree-checker: fix inline ref size in error messages
+
+From: Chung-Chiang Cheng <cccheng@synology.com>
+
+commit f398e70dd69e6ceea71463a5380e6118f219197e upstream.
+
+The error message should accurately reflect the size rather than the
+type.
+
+Fixes: f82d1c7ca8ae ("btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Chung-Chiang Cheng <cccheng@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-checker.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1399,7 +1399,7 @@ static int check_extent_item(struct exte
+ if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
+ extent_err(leaf, slot,
+ "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
+- ptr, inline_type, end);
++ ptr, btrfs_extent_inline_ref_size(inline_type), end);
+ return -EUCLEAN;
+ }
+
--- /dev/null
+From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 22 Jan 2024 15:18:11 +0100
+Subject: cpufreq: intel_pstate: Refine computation of P-state for given frequency
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 192cdb1c907fd8df2d764c5bb17496e415e59391 upstream.
+
+On systems using HWP, if a given frequency is equal to the maximum turbo
+frequency or the maximum non-turbo frequency, the HWP performance level
+corresponding to it is already known and can be used directly without
+any computation.
+
+Accordingly, adjust the code to use the known HWP performance levels in
+the cases mentioned above.
+
+This also helps to avoid limiting CPU capacity artificially in some
+cases when the BIOS produces the HWP_CAP numbers using a different
+E-core-to-P-core performance scaling factor than expected by the kernel.
+
+Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores")
+Cc: 6.1+ <stable@vger.kernel.org> # 6.1+
+Tested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cpufreq/intel_pstate.c | 55 +++++++++++++++++++++++++----------------
+ 1 file changed, 34 insertions(+), 21 deletions(-)
+
+--- a/drivers/cpufreq/intel_pstate.c
++++ b/drivers/cpufreq/intel_pstate.c
+@@ -493,6 +493,30 @@ static inline int intel_pstate_get_cppc_
+ }
+ #endif /* CONFIG_ACPI_CPPC_LIB */
+
++static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq,
++ unsigned int relation)
++{
++ if (freq == cpu->pstate.turbo_freq)
++ return cpu->pstate.turbo_pstate;
++
++ if (freq == cpu->pstate.max_freq)
++ return cpu->pstate.max_pstate;
++
++ switch (relation) {
++ case CPUFREQ_RELATION_H:
++ return freq / cpu->pstate.scaling;
++ case CPUFREQ_RELATION_C:
++ return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling);
++ }
++
++ return DIV_ROUND_UP(freq, cpu->pstate.scaling);
++}
++
++static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq)
++{
++ return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L);
++}
++
+ /**
+ * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
+ * @cpu: Target CPU.
+@@ -510,6 +534,7 @@ static void intel_pstate_hybrid_hwp_adju
+ int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+ int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu);
+ int scaling = cpu->pstate.scaling;
++ int freq;
+
+ pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+ pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+@@ -523,16 +548,16 @@ static void intel_pstate_hybrid_hwp_adju
+ cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
+ perf_ctl_scaling);
+
+- cpu->pstate.max_pstate_physical =
+- DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
+- scaling);
++ freq = perf_ctl_max_phys * perf_ctl_scaling;
++ cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq);
+
+- cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
++ freq = cpu->pstate.min_pstate * perf_ctl_scaling;
++ cpu->pstate.min_freq = freq;
+ /*
+ * Cast the min P-state value retrieved via pstate_funcs.get_min() to
+ * the effective range of HWP performance levels.
+ */
+- cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
++ cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
+ }
+
+ static inline void update_turbo_state(void)
+@@ -2493,13 +2518,12 @@ static void intel_pstate_update_perf_lim
+ * abstract values to represent performance rather than pure ratios.
+ */
+ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) {
+- int scaling = cpu->pstate.scaling;
+ int freq;
+
+ freq = max_policy_perf * perf_ctl_scaling;
+- max_policy_perf = DIV_ROUND_UP(freq, scaling);
++ max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
+ freq = min_policy_perf * perf_ctl_scaling;
+- min_policy_perf = DIV_ROUND_UP(freq, scaling);
++ min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq);
+ }
+
+ pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
+@@ -2873,18 +2897,7 @@ static int intel_cpufreq_target(struct c
+
+ cpufreq_freq_transition_begin(policy, &freqs);
+
+- switch (relation) {
+- case CPUFREQ_RELATION_L:
+- target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
+- break;
+- case CPUFREQ_RELATION_H:
+- target_pstate = freqs.new / cpu->pstate.scaling;
+- break;
+- default:
+- target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
+- break;
+- }
+-
++ target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation);
+ target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false);
+
+ freqs.new = target_pstate * cpu->pstate.scaling;
+@@ -2902,7 +2915,7 @@ static unsigned int intel_cpufreq_fast_s
+
+ update_turbo_state();
+
+- target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
++ target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq);
+
+ target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true);
+
--- /dev/null
+From 84c39ec57d409e803a9bb6e4e85daf1243e0e80b Mon Sep 17 00:00:00 2001
+From: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Date: Mon, 22 Jan 2024 19:34:21 +0100
+Subject: exec: Fix error handling in begin_new_exec()
+
+From: Bernd Edlinger <bernd.edlinger@hotmail.de>
+
+commit 84c39ec57d409e803a9bb6e4e85daf1243e0e80b upstream.
+
+If get_unused_fd_flags() fails, the error handling is incomplete because
+bprm->cred is already set to NULL, and therefore free_bprm will not
+unlock the cred_guard_mutex. Note there are two error conditions which
+end up here, one before and one after bprm->cred is cleared.
+
+Fixes: b8a61c9e7b4a ("exec: Generic execfd support")
+Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
+Acked-by: Eric W. Biederman <ebiederm@xmission.com>
+Link: https://lore.kernel.org/r/AS8P193MB128517ADB5EFF29E04389EDAE4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM
+Cc: stable@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1410,6 +1410,9 @@ int begin_new_exec(struct linux_binprm *
+
+ out_unlock:
+ up_write(&me->signal->exec_update_lock);
++ if (!bprm->cred)
++ mutex_unlock(&me->signal->cred_guard_mutex);
++
+ out:
+ return retval;
+ }
--- /dev/null
+From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001
+From: Cristian Marussi <cristian.marussi@arm.com>
+Date: Wed, 20 Dec 2023 17:21:12 +0000
+Subject: firmware: arm_scmi: Check mailbox/SMT channel for consistency
+
+From: Cristian Marussi <cristian.marussi@arm.com>
+
+commit 437a310b22244d4e0b78665c3042e5d1c0f45306 upstream.
+
+On reception of a completion interrupt the shared memory area is accessed
+to retrieve the message header at first and then, if the message sequence
+number identifies a transaction which is still pending, the related
+payload is fetched too.
+
+When an SCMI command times out the channel ownership remains with the
+platform until eventually a late reply is received and, as a consequence,
+any further transmission attempt remains pending, waiting for the channel
+to be relinquished by the platform.
+
+Once that late reply is received the channel ownership is given back
+to the agent and any pending request is then allowed to proceed and
+overwrite the SMT area of the just delivered late reply; then the wait
+for the reply to the new request starts.
+
+It has been observed that the spurious IRQ related to the late reply can
+be wrongly associated with the freshly enqueued request: when that happens
+the SCMI stack in-flight lookup procedure is fooled by the fact that the
+message header now present in the SMT area is related to the new pending
+transaction, even though the real reply has still to arrive.
+
+This race-condition on the A2P channel can be detected by looking at the
+channel status bits: a genuine reply from the platform will have set the
+channel free bit before triggering the completion IRQ.
+
+Add a consistency check to validate such condition in the A2P ISR.
+
+Reported-by: Xinglong Yang <xinglong.yang@cixtech.com>
+Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/
+Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type")
+Cc: stable@vger.kernel.org # 5.15+
+Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
+Tested-by: Xinglong Yang <xinglong.yang@cixtech.com>
+Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/arm_scmi/common.h | 1 +
+ drivers/firmware/arm_scmi/mailbox.c | 14 ++++++++++++++
+ drivers/firmware/arm_scmi/shmem.c | 6 ++++++
+ 3 files changed, 21 insertions(+)
+
+--- a/drivers/firmware/arm_scmi/common.h
++++ b/drivers/firmware/arm_scmi/common.h
+@@ -244,6 +244,7 @@ void shmem_fetch_notification(struct scm
+ void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem);
+ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem,
+ struct scmi_xfer *xfer);
++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem);
+
+ /* declarations for message passing transports */
+ struct scmi_msg_payld;
+--- a/drivers/firmware/arm_scmi/mailbox.c
++++ b/drivers/firmware/arm_scmi/mailbox.c
+@@ -43,6 +43,20 @@ static void rx_callback(struct mbox_clie
+ {
+ struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl);
+
++ /*
++ * An A2P IRQ is NOT valid when received while the platform still has
++ * the ownership of the channel, because the platform at first releases
++ * the SMT channel and then sends the completion interrupt.
++ *
++ * This addresses a possible race condition in which a spurious IRQ from
++ * a previous timed-out reply which arrived late could be wrongly
++ * associated with the next pending transaction.
++ */
++ if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) {
++ dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n");
++ return;
++ }
++
+ scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL);
+ }
+
+--- a/drivers/firmware/arm_scmi/shmem.c
++++ b/drivers/firmware/arm_scmi/shmem.c
+@@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_
+ (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR |
+ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
+ }
++
++bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem)
++{
++ return (ioread32(&shmem->channel_status) &
++ SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE);
++}
--- /dev/null
+From 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 Mon Sep 17 00:00:00 2001
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Wed, 17 Jan 2024 08:29:42 -0600
+Subject: gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+commit 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 upstream.
+
+Spurious wakeups are reported on the GPD G1619-04 which
+can be absolved by programming the GPIO to ignore wakeups.
+
+Cc: stable@vger.kernel.org
+Reported-and-tested-by: George Melikov <mail@gmelikov.ru>
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/drivers/gpio/gpiolib-acpi.c
++++ b/drivers/gpio/gpiolib-acpi.c
+@@ -1626,6 +1626,20 @@ static const struct dmi_system_id gpioli
+ .ignore_wake = "ELAN0415:00@9",
+ },
+ },
++ {
++ /*
++ * Spurious wakeups from TP_ATTN# pin
++ * Found in BIOS 0.35
++ * https://gitlab.freedesktop.org/drm/amd/-/issues/3073
++ */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "GPD"),
++ DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"),
++ },
++ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
++ .ignore_wake = "PNP0C50:00@8",
++ },
++ },
+ {} /* Terminating entry */
+ };
+
--- /dev/null
+From 6941f67ad37d5465b75b9ffc498fcf6897a3c00e Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mhklinux@outlook.com>
+Date: Mon, 22 Jan 2024 08:20:28 -0800
+Subject: hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes
+
+From: Michael Kelley <mhklinux@outlook.com>
+
+commit 6941f67ad37d5465b75b9ffc498fcf6897a3c00e upstream.
+
+Current code in netvsc_drv_init() incorrectly assumes that PAGE_SIZE
+is 4 Kbytes, which is wrong on ARM64 with 16K or 64K page size. As a
+result, the default VMBus ring buffer size on ARM64 with 64K page size
+is 8 Mbytes instead of the expected 512 Kbytes. While this doesn't break
+anything, a typical VM with 8 vCPUs and 8 netvsc channels wastes 120
+Mbytes (8 channels * 2 ring buffers/channel * 7.5 Mbytes/ring buffer).
+
+Unfortunately, the module parameter specifying the ring buffer size
+is in units of 4 Kbyte pages. Ideally, it should be in units that
+are independent of PAGE_SIZE, but backwards compatibility prevents
+changing that now.
+
+Fix this by having netvsc_drv_init() hardcode 4096 instead of using
+PAGE_SIZE when calculating the ring buffer size in bytes. Also
+use the VMBUS_RING_SIZE macro to ensure proper alignment when running
+with page size larger than 4K.
+
+Cc: <stable@vger.kernel.org> # 5.15.x
+Fixes: 7aff79e297ee ("Drivers: hv: Enable Hyper-V code to be built on ARM64")
+Signed-off-by: Michael Kelley <mhklinux@outlook.com>
+Link: https://lore.kernel.org/r/20240122162028.348885-1-mhklinux@outlook.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -44,7 +44,7 @@
+
+ static unsigned int ring_size __ro_after_init = 128;
+ module_param(ring_size, uint, 0444);
+-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
++MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
+ unsigned int netvsc_ring_bytes __ro_after_init;
+
+ static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+@@ -2801,7 +2801,7 @@ static int __init netvsc_drv_init(void)
+ pr_info("Increased ring_size to %u (min allowed)\n",
+ ring_size);
+ }
+- netvsc_ring_bytes = ring_size * PAGE_SIZE;
++ netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
+
+ register_netdevice_notifier(&netvsc_netdev_notifier);
+
--- /dev/null
+From ebeae8adf89d9a82359f6659b1663d09beec2faa Mon Sep 17 00:00:00 2001
+From: Lin Ma <linma@zju.edu.cn>
+Date: Sun, 21 Jan 2024 15:35:06 +0800
+Subject: ksmbd: fix global oob in ksmbd_nl_policy
+
+From: Lin Ma <linma@zju.edu.cn>
+
+commit ebeae8adf89d9a82359f6659b1663d09beec2faa upstream.
+
+Similar to a reported issue (check the commit b33fb5b801c6 ("net:
+qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds
+another global out-of-bounds read for policy ksmbd_nl_policy. See bug
+trace below:
+
+==================================================================
+BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline]
+BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600
+Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810
+
+CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G N 6.1.0 #3
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:284 [inline]
+ print_report+0x172/0x475 mm/kasan/report.c:395
+ kasan_report+0xbb/0x1c0 mm/kasan/report.c:495
+ validate_nla lib/nlattr.c:386 [inline]
+ __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600
+ __nla_parse+0x3e/0x50 lib/nlattr.c:697
+ __nlmsg_parse include/net/netlink.h:748 [inline]
+ genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565
+ genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734
+ genl_family_rcv_msg net/netlink/genetlink.c:833 [inline]
+ genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850
+ netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540
+ genl_rcv+0x24/0x40 net/netlink/genetlink.c:861
+ netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345
+ netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921
+ sock_sendmsg_nosec net/socket.c:714 [inline]
+ sock_sendmsg+0x154/0x190 net/socket.c:734
+ ____sys_sendmsg+0x6df/0x840 net/socket.c:2482
+ ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536
+ __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7fdd66a8f359
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359
+RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003
+RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000
+ </TASK>
+
+The buggy address belongs to the variable:
+ ksmbd_nl_policy+0x100/0xa80
+
+The buggy address belongs to the physical page:
+page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b
+flags: 0x200000000001000(reserved|node=0|zone=2)
+raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000
+raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+>ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9
+ ^
+ ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05
+ ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9
+==================================================================
+
+To fix it, add a placeholder named __KSMBD_EVENT_MAX and let
+KSMBD_EVENT_MAX to be its original value - 1 according to what other
+netlink families do. Also change two sites that refer the
+KSMBD_EVENT_MAX to correct value.
+
+Cc: stable@vger.kernel.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/smb/server/ksmbd_netlink.h | 3 ++-
+ fs/smb/server/transport_ipc.c | 4 ++--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/smb/server/ksmbd_netlink.h
++++ b/fs/smb/server/ksmbd_netlink.h
+@@ -304,7 +304,8 @@ enum ksmbd_event {
+ KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+ KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15,
+
+- KSMBD_EVENT_MAX
++ __KSMBD_EVENT_MAX,
++ KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1
+ };
+
+ /*
+--- a/fs/smb/server/transport_ipc.c
++++ b/fs/smb/server/transport_ipc.c
+@@ -74,7 +74,7 @@ static int handle_unsupported_event(stru
+ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info);
+ static int ksmbd_ipc_heartbeat_request(void);
+
+-static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = {
++static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = {
+ [KSMBD_EVENT_UNSPEC] = {
+ .len = 0,
+ },
+@@ -403,7 +403,7 @@ static int handle_generic_event(struct s
+ return -EPERM;
+ #endif
+
+- if (type >= KSMBD_EVENT_MAX) {
++ if (type > KSMBD_EVENT_MAX) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
--- /dev/null
+From f342de4e2f33e0e39165d8639387aa6c19dff660 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Sat, 20 Jan 2024 22:50:04 +0100
+Subject: netfilter: nf_tables: reject QUEUE/DROP verdict parameters
+
+From: Florian Westphal <fw@strlen.de>
+
+commit f342de4e2f33e0e39165d8639387aa6c19dff660 upstream.
+
+This reverts commit e0abdadcc6e1.
+
+core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP
+verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar,
+or 0.
+
+Due to the reverted commit, its possible to provide a positive
+value, e.g. NF_ACCEPT (1), which results in use-after-free.
+
+Its not clear to me why this commit was made.
+
+NF_QUEUE is not used by nftables; "queue" rules in nftables
+will result in use of "nft_queue" expression.
+
+If we later need to allow specifiying errno values from userspace
+(do not know why), this has to call NF_DROP_GETERR and check that
+"err <= 0" holds true.
+
+Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters")
+Cc: stable@vger.kernel.org
+Reported-by: Notselwyn <notselwyn@pwning.tech>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10448,16 +10448,10 @@ static int nft_verdict_init(const struct
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+ switch (data->verdict.code) {
+- default:
+- switch (data->verdict.code & NF_VERDICT_MASK) {
+- case NF_ACCEPT:
+- case NF_DROP:
+- case NF_QUEUE:
+- break;
+- default:
+- return -EINVAL;
+- }
+- fallthrough;
++ case NF_ACCEPT:
++ case NF_DROP:
++ case NF_QUEUE:
++ break;
+ case NFT_CONTINUE:
+ case NFT_BREAK:
+ case NFT_RETURN:
+@@ -10492,6 +10486,8 @@ static int nft_verdict_init(const struct
+
+ data->verdict.chain = chain;
+ break;
++ default:
++ return -EINVAL;
+ }
+
+ desc->len = sizeof(data->verdict);
--- /dev/null
+From 01acb2e8666a6529697141a6017edbf206921913 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Thu, 18 Jan 2024 10:56:26 +0100
+Subject: netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 01acb2e8666a6529697141a6017edbf206921913 upstream.
+
+Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER
+event is reported, otherwise a stale reference to netdevice remains in
+the hook list.
+
+Fixes: 60a3815da702 ("netfilter: add inet ingress support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_chain_filter.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nft_chain_filter.c
++++ b/net/netfilter/nft_chain_filter.c
+@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct
+ unsigned long event, void *ptr)
+ {
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct nft_base_chain *basechain;
+ struct nftables_pernet *nft_net;
+- struct nft_table *table;
+ struct nft_chain *chain, *nr;
++ struct nft_table *table;
+ struct nft_ctx ctx = {
+ .net = dev_net(dev),
+ };
+@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct
+ nft_net = nft_pernet(ctx.net);
+ mutex_lock(&nft_net->commit_mutex);
+ list_for_each_entry(table, &nft_net->tables, list) {
+- if (table->family != NFPROTO_NETDEV)
++ if (table->family != NFPROTO_NETDEV &&
++ table->family != NFPROTO_INET)
+ continue;
+
+ ctx.family = table->family;
+@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct
+ if (!nft_is_base_chain(chain))
+ continue;
+
++ basechain = nft_base_chain(chain);
++ if (table->family == NFPROTO_INET &&
++ basechain->ops.hooknum != NF_INET_INGRESS)
++ continue;
++
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
+ }
--- /dev/null
+From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+commit edcf9725150e42beeca42d085149f4c88fa97afd upstream.
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful. Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible. It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero. However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked(). It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids. If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded). When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid. It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe. For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7736,14 +7736,16 @@ check_for_locks(struct nfs4_file *fp, st
+ {
+ struct file_lock *fl;
+ int status = false;
+- struct nfsd_file *nf = find_any_file(fp);
++ struct nfsd_file *nf;
+ struct inode *inode;
+ struct file_lock_context *flctx;
+
++ spin_lock(&fp->fi_lock);
++ nf = find_any_file_locked(fp);
+ if (!nf) {
+ /* Any valid lock stateid should have some sort of access */
+ WARN_ON_ONCE(1);
+- return status;
++ goto out;
+ }
+
+ inode = locks_inode(nf->nf_file);
+@@ -7759,7 +7761,8 @@ check_for_locks(struct nfs4_file *fp, st
+ }
+ spin_unlock(&flctx->flc_lock);
+ }
+- nfsd_file_put(nf);
++out:
++ spin_unlock(&fp->fi_lock);
+ return status;
+ }
+
+@@ -7769,10 +7772,8 @@ check_for_locks(struct nfs4_file *fp, st
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+@@ -7808,10 +7809,13 @@ nfsd4_release_lockowner(struct svc_rqst
+ spin_unlock(&clp->cl_lock);
+ return nfs_ok;
+ }
+- if (atomic_read(&lo->lo_owner.so_count) != 2) {
+- spin_unlock(&clp->cl_lock);
+- nfs4_put_stateowner(&lo->lo_owner);
+- return nfserr_locks_held;
++
++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++ if (check_for_locks(stp->st_stid.sc_file, lo)) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
--- /dev/null
+From 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Mon, 8 Jan 2024 15:20:58 +0900
+Subject: platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b upstream.
+
+p2sb_bar() unhides P2SB device to get resources from the device. It
+guards the operation by locking pci_rescan_remove_lock so that parallel
+rescans do not find the P2SB device. However, this lock causes deadlock
+when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan
+locks pci_rescan_remove_lock and probes PCI devices. When PCI devices
+call p2sb_bar() during probe, it locks pci_rescan_remove_lock again.
+Hence the deadlock.
+
+To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar().
+Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources()
+for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(),
+refer the cache and return to the caller.
+
+Before operating the device at P2SB DEVFN for resource cache, check
+that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH
+specifications define. This avoids unexpected operation to other devices
+at the same DEVFN.
+
+Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/
+Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support")
+Cc: stable@vger.kernel.org
+Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Tested-by Klara Modin <klarasmodin@gmail.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/p2sb.c | 180 ++++++++++++++++++++++++++++--------
+ 1 file changed, 139 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 1cf2471d54dd..17cc4b45e023 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = {
+ {}
+ };
+
++/*
++ * Cache BAR0 of P2SB device functions 0 to 7.
++ * TODO: The constant 8 is the number of functions that PCI specification
++ * defines. Same definitions exist tree-wide. Unify this definition and
++ * the other definitions then move to include/uapi/linux/pci.h.
++ */
++#define NR_P2SB_RES_CACHE 8
++
++struct p2sb_res_cache {
++ u32 bus_dev_id;
++ struct resource res;
++};
++
++static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
++
+ static int p2sb_get_devfn(unsigned int *devfn)
+ {
+ unsigned int fn = P2SB_DEVFN_DEFAULT;
+@@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn)
+ return 0;
+ }
+
++static bool p2sb_valid_resource(struct resource *res)
++{
++ if (res->flags)
++ return true;
++
++ return false;
++}
++
+ /* Copy resource from the first BAR of the device in question */
+-static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
++static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
+ {
+ struct resource *bar0 = &pdev->resource[0];
+
+@@ -56,49 +79,66 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem)
+ mem->end = bar0->end;
+ mem->flags = bar0->flags;
+ mem->desc = bar0->desc;
+-
+- return 0;
+ }
+
+-static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
+ {
++ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
+ struct pci_dev *pdev;
+- int ret;
+
+ pdev = pci_scan_single_device(bus, devfn);
+ if (!pdev)
+- return -ENODEV;
++ return;
+
+- ret = p2sb_read_bar0(pdev, mem);
++ p2sb_read_bar0(pdev, &cache->res);
++ cache->bus_dev_id = bus->dev.id;
+
+ pci_stop_and_remove_bus_device(pdev);
+- return ret;
+ }
+
+-/**
+- * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+- * @bus: PCI bus to communicate with
+- * @devfn: PCI slot and function to communicate with
+- * @mem: memory resource to be filled in
+- *
+- * The BIOS prevents the P2SB device from being enumerated by the PCI
+- * subsystem, so we need to unhide and hide it back to lookup the BAR.
+- *
+- * if @bus is NULL, the bus 0 in domain 0 will be used.
+- * If @devfn is 0, it will be replaced by devfn of the P2SB device.
+- *
+- * Caller must provide a valid pointer to @mem.
+- *
+- * Locking is handled by pci_rescan_remove_lock mutex.
+- *
+- * Return:
+- * 0 on success or appropriate errno value on error.
+- */
+-int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
++{
++ unsigned int slot, fn;
++
++ if (PCI_FUNC(devfn) == 0) {
++ /*
++ * When function number of the P2SB device is zero, scan it and
++ * other function numbers, and if devices are available, cache
++ * their BAR0s.
++ */
++ slot = PCI_SLOT(devfn);
++ for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++)
++ p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn));
++ } else {
++ /* Scan the P2SB device and cache its BAR0 */
++ p2sb_scan_and_cache_devfn(bus, devfn);
++ }
++
++ if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
++ return -ENOENT;
++
++ return 0;
++}
++
++static struct pci_bus *p2sb_get_bus(struct pci_bus *bus)
++{
++ static struct pci_bus *p2sb_bus;
++
++ bus = bus ?: p2sb_bus;
++ if (bus)
++ return bus;
++
++ /* Assume P2SB is on the bus 0 in domain 0 */
++ p2sb_bus = pci_find_bus(0, 0);
++ return p2sb_bus;
++}
++
++static int p2sb_cache_resources(void)
+ {
+- struct pci_dev *pdev_p2sb;
+ unsigned int devfn_p2sb;
+ u32 value = P2SBC_HIDE;
++ struct pci_bus *bus;
++ u16 class;
+ int ret;
+
+ /* Get devfn for P2SB device itself */
+@@ -106,8 +146,17 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ if (ret)
+ return ret;
+
+- /* if @bus is NULL, use bus 0 in domain 0 */
+- bus = bus ?: pci_find_bus(0, 0);
++ bus = p2sb_get_bus(NULL);
++ if (!bus)
++ return -ENODEV;
++
++ /*
++ * When a device with same devfn exists and its device class is not
++ * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it.
++ */
++ pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class);
++ if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER)
++ return -ENODEV;
+
+ /*
+ * Prevent concurrent PCI bus scan from seeing the P2SB device and
+@@ -115,17 +164,16 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ */
+ pci_lock_rescan_remove();
+
+- /* Unhide the P2SB device, if needed */
++ /*
++ * The BIOS prevents the P2SB device from being enumerated by the PCI
++ * subsystem, so we need to unhide and hide it back to lookup the BAR.
++ * Unhide the P2SB device here, if needed.
++ */
+ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+ if (value & P2SBC_HIDE)
+ pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+
+- pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb);
+- if (devfn)
+- ret = p2sb_scan_and_read(bus, devfn, mem);
+- else
+- ret = p2sb_read_bar0(pdev_p2sb, mem);
+- pci_stop_and_remove_bus_device(pdev_p2sb);
++ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+
+ /* Hide the P2SB device, if it was hidden */
+ if (value & P2SBC_HIDE)
+@@ -133,12 +181,62 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+
+ pci_unlock_rescan_remove();
+
+- if (ret)
+- return ret;
++ return ret;
++}
++
++/**
++ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
++ * @bus: PCI bus to communicate with
++ * @devfn: PCI slot and function to communicate with
++ * @mem: memory resource to be filled in
++ *
++ * If @bus is NULL, the bus 0 in domain 0 will be used.
++ * If @devfn is 0, it will be replaced by devfn of the P2SB device.
++ *
++ * Caller must provide a valid pointer to @mem.
++ *
++ * Return:
++ * 0 on success or appropriate errno value on error.
++ */
++int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
++{
++ struct p2sb_res_cache *cache;
++ int ret;
++
++ bus = p2sb_get_bus(bus);
++ if (!bus)
++ return -ENODEV;
++
++ if (!devfn) {
++ ret = p2sb_get_devfn(&devfn);
++ if (ret)
++ return ret;
++ }
+
+- if (mem->flags == 0)
++ cache = &p2sb_resources[PCI_FUNC(devfn)];
++ if (cache->bus_dev_id != bus->dev.id)
+ return -ENODEV;
+
++ if (!p2sb_valid_resource(&cache->res))
++ return -ENOENT;
++
++ memcpy(mem, &cache->res, sizeof(*mem));
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
++
++static int __init p2sb_fs_init(void)
++{
++ p2sb_cache_resources();
++ return 0;
++}
++
++/*
++ * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can
++ * not be locked in sysfs pci bus rescan path because of deadlock. To
++ * avoid the deadlock, access to P2SB devices with the lock at an early
++ * step in kernel initialization and cache required resources. This
++ * should happen after subsys_initcall which initializes PCI subsystem
++ * and before device_initcall which requires P2SB resources.
++ */
++fs_initcall(p2sb_fs_init);
+--
+2.43.0
+
--- /dev/null
+From ded080c86b3f99683774af0441a58fc2e3d60cae Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Wed, 17 Jan 2024 18:59:44 +0100
+Subject: rbd: don't move requests to the running list on errors
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit ded080c86b3f99683774af0441a58fc2e3d60cae upstream.
+
+The running list is supposed to contain requests that are pinning the
+exclusive lock, i.e. those that must be flushed before exclusive lock
+is released. When wake_lock_waiters() is called to handle an error,
+requests on the acquiring list are failed with that error and no
+flushing takes place. Briefly moving them to the running list is not
+only pointless but also harmful: if exclusive lock gets acquired
+before all of their state machines are scheduled and go through
+rbd_lock_del_request(), we trigger
+
+ rbd_assert(list_empty(&rbd_dev->running_list));
+
+in rbd_try_acquire_lock().
+
+Cc: stable@vger.kernel.org
+Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code")
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3453,14 +3453,15 @@ static bool rbd_lock_add_request(struct
+ static void rbd_lock_del_request(struct rbd_img_request *img_req)
+ {
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
+- bool need_wakeup;
++ bool need_wakeup = false;
+
+ lockdep_assert_held(&rbd_dev->lock_rwsem);
+ spin_lock(&rbd_dev->lock_lists_lock);
+- rbd_assert(!list_empty(&img_req->lock_item));
+- list_del_init(&img_req->lock_item);
+- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+- list_empty(&rbd_dev->running_list));
++ if (!list_empty(&img_req->lock_item)) {
++ list_del_init(&img_req->lock_item);
++ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
++ list_empty(&rbd_dev->running_list));
++ }
+ spin_unlock(&rbd_dev->lock_lists_lock);
+ if (need_wakeup)
+ complete(&rbd_dev->releasing_wait);
+@@ -3843,14 +3844,19 @@ static void wake_lock_waiters(struct rbd
+ return;
+ }
+
+- list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
++ while (!list_empty(&rbd_dev->acquiring_list)) {
++ img_req = list_first_entry(&rbd_dev->acquiring_list,
++ struct rbd_img_request, lock_item);
+ mutex_lock(&img_req->state_mutex);
+ rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
++ if (!result)
++ list_move_tail(&img_req->lock_item,
++ &rbd_dev->running_list);
++ else
++ list_del_init(&img_req->lock_item);
+ rbd_img_schedule(img_req, result);
+ mutex_unlock(&img_req->state_mutex);
+ }
+-
+- list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
+ }
+
+ static bool locker_equal(const struct ceph_locker *lhs,
fjes-fix-memleaks-in-fjes_hw_setup.patch
net-fec-fix-the-unhandled-context-fault-from-smmu.patch
nbd-always-initialize-struct-msghdr-completely.patch
+btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-snapshot-of-subvolume-being-deleted.patch
+btrfs-ref-verify-free-ref-cache-before-clearing-mount-opt.patch
+btrfs-tree-checker-fix-inline-ref-size-in-error-messages.patch
+btrfs-don-t-warn-if-discard-range-is-not-aligned-to-sector.patch
+btrfs-defrag-reject-unknown-flags-of-btrfs_ioctl_defrag_range_args.patch
+btrfs-don-t-abort-filesystem-when-attempting-to-snapshot-deleted-subvolume.patch
+rbd-don-t-move-requests-to-the-running-list-on-errors.patch
+exec-fix-error-handling-in-begin_new_exec.patch
+wifi-iwlwifi-fix-a-memory-corruption.patch
+nfsd-fix-release_lockowner.patch
+hv_netvsc-calculate-correct-ring-size-when-page_size-is-not-4-kbytes.patch
+netfilter-nft_chain_filter-handle-netdev_unregister-for-inet-ingress-basechain.patch
+netfilter-nf_tables-reject-queue-drop-verdict-parameters.patch
+platform-x86-p2sb-allow-p2sb_bar-calls-during-pci-device-probe.patch
+ksmbd-fix-global-oob-in-ksmbd_nl_policy.patch
+firmware-arm_scmi-check-mailbox-smt-channel-for-consistency.patch
+xfs-read-only-mounts-with-fsopen-mount-api-are-busted.patch
+gpiolib-acpi-ignore-touchpad-wakeup-on-gpd-g1619-04.patch
+cpufreq-intel_pstate-refine-computation-of-p-state-for-given-frequency.patch
--- /dev/null
+From cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d Mon Sep 17 00:00:00 2001
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Date: Thu, 11 Jan 2024 15:07:25 +0200
+Subject: wifi: iwlwifi: fix a memory corruption
+
+From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+
+commit cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d upstream.
+
+iwl_fw_ini_trigger_tlv::data is a pointer to a __le32, which means that
+if we copy to iwl_fw_ini_trigger_tlv::data + offset while offset is in
+bytes, we'll write past the buffer.
+
+Cc: stable@vger.kernel.org
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218233
+Fixes: cf29c5b66b9f ("iwlwifi: dbg_ini: implement time point handling")
+Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
+Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
+Link: https://msgid.link/20240111150610.2d2b8b870194.I14ed76505a5cf87304e0c9cc05cc0ae85ed3bf91@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+@@ -1082,7 +1082,7 @@ static int iwl_dbg_tlv_override_trig_nod
+ node_trig = (void *)node_tlv->data;
+ }
+
+- memcpy(node_trig->data + offset, trig->data, trig_data_len);
++ memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len);
+ node_tlv->length = cpu_to_le32(size);
+
+ if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) {
--- /dev/null
+From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Tue, 16 Jan 2024 15:33:07 +1100
+Subject: xfs: read only mounts with fsopen mount API are busted
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit d8d222e09dab84a17bb65dda4b94d01c565f5327 upstream.
+
+Recently xfs/513 started failing on my test machines testing "-o
+ro,norecovery" mount options. This was being emitted in dmesg:
+
+[ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only.
+
+Turns out, readonly mounts with the fsopen()/fsconfig() mount API
+have been busted since day zero. It's only taken 5 years for debian
+unstable to start using this "new" mount API, and shortly after this
+I noticed xfs/513 had started to fail as per above.
+
+The syscall trace is:
+
+fsopen("xfs", FSOPEN_CLOEXEC) = 3
+mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument)
+.....
+fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0
+fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0
+fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0
+fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument)
+close(3) = 0
+
+Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is
+what threw out the error.
+
+During mount instantiation, we call xfs_fs_validate_params() which
+does:
+
+ /* No recovery flag requires a read-only mount */
+ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+ xfs_warn(mp, "no-recovery mounts must be read-only.");
+ return -EINVAL;
+ }
+
+and xfs_is_readonly() checks internal mount flags for read only
+state. This state is set in xfs_init_fs_context() from the
+context superblock flag state:
+
+ /*
+ * Copy binary VFS mount flags we are interested in.
+ */
+ if (fc->sb_flags & SB_RDONLY)
+ set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+
+With the old mount API, all of the VFS specific superblock flags
+had already been parsed and set before xfs_init_fs_context() is
+called, so this all works fine.
+
+However, in the brave new fsopen/fsconfig world,
+xfs_init_fs_context() is called from fsopen() context, before any
+VFS superblock have been set or parsed. Hence if we use fsopen(),
+the internal XFS readonly state is *never set*. Hence anything that
+depends on xfs_is_readonly() actually returning true for read only
+mounts is broken if fsopen() has been used to mount the filesystem.
+
+Fix this by moving this internal state initialisation to
+xfs_fs_fill_super() before we attempt to validate the parameters
+that have been set prior to the FSCONFIG_CMD_CREATE call being made.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api")
+cc: stable@vger.kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_super.c | 27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -1458,6 +1458,18 @@ xfs_fs_fill_super(
+
+ mp->m_super = sb;
+
++ /*
++ * Copy VFS mount flags from the context now that all parameter parsing
++ * is guaranteed to have been completed by either the old mount API or
++ * the newer fsopen/fsconfig API.
++ */
++ if (fc->sb_flags & SB_RDONLY)
++ set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
++ if (fc->sb_flags & SB_DIRSYNC)
++ mp->m_features |= XFS_FEAT_DIRSYNC;
++ if (fc->sb_flags & SB_SYNCHRONOUS)
++ mp->m_features |= XFS_FEAT_WSYNC;
++
+ error = xfs_fs_validate_params(mp);
+ if (error)
+ goto out_free_names;
+@@ -1915,6 +1927,11 @@ static const struct fs_context_operation
+ .free = xfs_fs_free,
+ };
+
++/*
++ * WARNING: do not initialise any parameters in this function that depend on
++ * mount option parsing having already been performed as this can be called from
++ * fsopen() before any parameters have been set.
++ */
+ static int xfs_init_fs_context(
+ struct fs_context *fc)
+ {
+@@ -1947,16 +1964,6 @@ static int xfs_init_fs_context(
+ mp->m_logbsize = -1;
+ mp->m_allocsize_log = 16; /* 64k */
+
+- /*
+- * Copy binary VFS mount flags we are interested in.
+- */
+- if (fc->sb_flags & SB_RDONLY)
+- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+- if (fc->sb_flags & SB_DIRSYNC)
+- mp->m_features |= XFS_FEAT_DIRSYNC;
+- if (fc->sb_flags & SB_SYNCHRONOUS)
+- mp->m_features |= XFS_FEAT_WSYNC;
+-
+ fc->s_fs_info = mp;
+ fc->ops = &xfs_context_ops;
+