--- /dev/null
+From c87d8260c78fdc9dc4c5738e5f65ebf76cfb689f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Feb 2025 14:38:23 -0800
+Subject: bdev: use bdev_io_min() for statx block size
+
+From: Luis Chamberlain <mcgrof@kernel.org>
+
+[ Upstream commit 425fbcd62d2e1330e64d8d3bf89e554830ba997f ]
+
+You can use lsblk to query for a block device block device block size:
+
+lsblk -o MIN-IO /dev/nvme0n1
+MIN-IO
+ 4096
+
+The min-io is the minimum IO the block device prefers for optimal
+performance. In turn we map this to the block device block size.
+The current block size exposed even for block devices with an
+LBA format of 16k is 4k. Likewise devices which support 4k LBA format
+but have a larger Indirection Unit of 16k have an exposed block size
+of 4k.
+
+This incurs read-modify-writes on direct IO against devices with a
+min-io larger than the page size. To fix this, use the block device
+min io, which is the minimal optimal IO the device prefers.
+
+With this we now get:
+
+lsblk -o MIN-IO /dev/nvme0n1
+MIN-IO
+ 16384
+
+And so userspace gets the appropriate information it needs for optimal
+performance. This is verified with blkalgn against mkfs against a
+device with LBA format of 4k but an NPWG of 16k (min io size)
+
+mkfs.xfs -f -b size=16k /dev/nvme3n1
+blkalgn -d nvme3n1 --ops Write
+
+ Block size : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 0 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 66 |****************************************|
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 2 |* |
+Block size: 14 - 66
+Block size: 17 - 2
+
+ Algn size : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 0 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 66 |****************************************|
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 2 |* |
+Algn size: 14 - 66
+Algn size: 17 - 2
+
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Link: https://lore.kernel.org/r/20250221223823.1680616-9-mcgrof@kernel.org
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bdev.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/block/bdev.c b/block/bdev.c
+index 9d73a8fbf7f99..8453f6a795d9a 100644
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -1274,9 +1274,6 @@ void bdev_statx(struct path *path, struct kstat *stat,
+ struct inode *backing_inode;
+ struct block_device *bdev;
+
+- if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
+- return;
+-
+ backing_inode = d_backing_inode(path->dentry);
+
+ /*
+@@ -1303,6 +1300,8 @@ void bdev_statx(struct path *path, struct kstat *stat,
+ queue_atomic_write_unit_max_bytes(bd_queue));
+ }
+
++ stat->blksize = bdev_io_min(bdev);
++
+ blkdev_put_no_open(bdev);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From b8ccd8a745fdcb09743f94aef1eba2baa7fdb49e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 07:37:41 +0200
+Subject: block: don't autoload drivers on stat
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 5f33b5226c9d92359e58e91ad0bf0c1791da36a1 ]
+
+blkdev_get_no_open can trigger the legacy autoload of block drivers. A
+simple stat of a block device has not historically done that, so disable
+this behavior again.
+
+Fixes: 9abcfbd235f5 ("block: Add atomic write support for statx")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20250423053810.1683309-4-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bdev.c | 8 ++++----
+ block/blk-cgroup.c | 2 +-
+ block/blk.h | 2 +-
+ block/fops.c | 2 +-
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/block/bdev.c b/block/bdev.c
+index 89235796e51a5..5aebcf437f17c 100644
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -773,13 +773,13 @@ static void blkdev_put_part(struct block_device *part)
+ blkdev_put_whole(whole);
+ }
+
+-struct block_device *blkdev_get_no_open(dev_t dev)
++struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
+ {
+ struct block_device *bdev;
+ struct inode *inode;
+
+ inode = ilookup(blockdev_superblock, dev);
+- if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
++ if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
+ blk_request_module(dev);
+ inode = ilookup(blockdev_superblock, dev);
+ if (inode)
+@@ -1001,7 +1001,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ if (ret)
+ return ERR_PTR(ret);
+
+- bdev = blkdev_get_no_open(dev);
++ bdev = blkdev_get_no_open(dev, true);
+ if (!bdev)
+ return ERR_PTR(-ENXIO);
+
+@@ -1279,7 +1279,7 @@ void bdev_statx(struct path *path, struct kstat *stat,
+ * use I_BDEV() here; the block device has to be looked up by i_rdev
+ * instead.
+ */
+- bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev);
++ bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
+ if (!bdev)
+ return;
+
+diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
+index 9ed93d91d754a..c94efae5bcfaf 100644
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -796,7 +796,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
+ return -EINVAL;
+ input = skip_spaces(input);
+
+- bdev = blkdev_get_no_open(MKDEV(major, minor));
++ bdev = blkdev_get_no_open(MKDEV(major, minor), true);
+ if (!bdev)
+ return -ENODEV;
+ if (bdev_is_partition(bdev)) {
+diff --git a/block/blk.h b/block/blk.h
+index c0120a3d9dc57..9dcc92c7f2b50 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -94,7 +94,7 @@ static inline void blk_wait_io(struct completion *done)
+ wait_for_completion_io(done);
+ }
+
+-struct block_device *blkdev_get_no_open(dev_t dev);
++struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
+ void blkdev_put_no_open(struct block_device *bdev);
+
+ #define BIO_INLINE_VECS 4
+diff --git a/block/fops.c b/block/fops.c
+index be9f1dbea9ce0..d23ddb2dc1138 100644
+--- a/block/fops.c
++++ b/block/fops.c
+@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
+ if (ret)
+ return ret;
+
+- bdev = blkdev_get_no_open(inode->i_rdev);
++ bdev = blkdev_get_no_open(inode->i_rdev, true);
+ if (!bdev)
+ return -ENXIO;
+
+--
+2.39.5
+
--- /dev/null
+From f0bac2898219c7d5d4e8a1b60675d720aacec8cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 07:37:39 +0200
+Subject: block: move blkdev_{get,put} _no_open prototypes out of blkdev.h
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit c63202140d4b411d27380805c4d68eb11407b7f2 ]
+
+These are only to be used by block internal code. Remove the comment
+as we grew more users due to reworking block device node opening.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20250423053810.1683309-2-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk.h | 3 +++
+ include/linux/blkdev.h | 4 ----
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/block/blk.h b/block/blk.h
+index 9cf9a0099416d..c0120a3d9dc57 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
+ wait_for_completion_io(done);
+ }
+
++struct block_device *blkdev_get_no_open(dev_t dev);
++void blkdev_put_no_open(struct block_device *bdev);
++
+ #define BIO_INLINE_VECS 4
+ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
+ gfp_t gfp_mask);
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index d37751789bf58..6aa67e9b2ec08 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1649,10 +1649,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops);
+ void bd_abort_claiming(struct block_device *bdev, void *holder);
+
+-/* just for blk-cgroup, don't use elsewhere */
+-struct block_device *blkdev_get_no_open(dev_t dev);
+-void blkdev_put_no_open(struct block_device *bdev);
+-
+ struct block_device *I_BDEV(struct inode *inode);
+ struct block_device *file_bdev(struct file *bdev_file);
+ bool disk_live(struct gendisk *disk);
+--
+2.39.5
+
--- /dev/null
+From d5096485ae2f486eabca609e893c7b6971d39cc3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 10:25:21 +0200
+Subject: block: never reduce ra_pages in blk_apply_bdi_limits
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 7b720c720253e2070459420b2628a7b9ee6733b3 ]
+
+When the user increased the read-ahead size through sysfs this value
+currently get lost if the device is reprobe, including on a resume
+from suspend.
+
+As there is no hardware limitation for the read-ahead size there is
+no real need to reset it or track a separate hardware limitation
+like for max_sectors.
+
+This restores the pre-atomic queue limit behavior in the sd driver as
+sd did not use blk_queue_io_opt and thus never updated the read ahead
+size to the value based of the optimal I/O, but changes behavior for
+all other drivers. As the new behavior seems useful and sd is the
+driver for which the readahead size tweaks are most useful that seems
+like a worthwhile trade off.
+
+Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API")
+Reported-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Link: https://lore.kernel.org/r/20250424082521.1967286-1-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-settings.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/block/blk-settings.c b/block/blk-settings.c
+index 66721afeea546..67b119ffa1689 100644
+--- a/block/blk-settings.c
++++ b/block/blk-settings.c
+@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+ /*
+ * For read-ahead of large files to be effective, we need to read ahead
+ * at least twice the optimal I/O size.
++ *
++ * There is no hardware limitation for the read-ahead size and the user
++ * might have increased the read-ahead size through sysfs, so don't ever
++ * decrease it.
+ */
+- bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
++ bdi->ra_pages = max3(bdi->ra_pages,
++ lim->io_opt * 2 / PAGE_SIZE,
++ VM_READAHEAD_PAGES);
+ bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 9a7eeadb5a98c3a6edc41b61cc5e2122d326b89c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 07:37:40 +0200
+Subject: block: remove the backing_inode variable in bdev_statx
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit d13b7090b2510abaa83a25717466decca23e8226 ]
+
+backing_inode is only used once, so remove it and update the comment
+describing the bdev lookup to be a bit more clear.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20250423053810.1683309-3-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/bdev.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/block/bdev.c b/block/bdev.c
+index 8453f6a795d9a..89235796e51a5 100644
+--- a/block/bdev.c
++++ b/block/bdev.c
+@@ -1271,18 +1271,15 @@ void sync_bdevs(bool wait)
+ void bdev_statx(struct path *path, struct kstat *stat,
+ u32 request_mask)
+ {
+- struct inode *backing_inode;
+ struct block_device *bdev;
+
+- backing_inode = d_backing_inode(path->dentry);
+-
+ /*
+- * Note that backing_inode is the inode of a block device node file,
+- * not the block device's internal inode. Therefore it is *not* valid
+- * to use I_BDEV() here; the block device has to be looked up by i_rdev
++ * Note that d_backing_inode() returns the block device node inode, not
++ * the block device's internal inode. Therefore it is *not* valid to
++ * use I_BDEV() here; the block device has to be looked up by i_rdev
+ * instead.
+ */
+- bdev = blkdev_get_no_open(backing_inode->i_rdev);
++ bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev);
+ if (!bdev)
+ return;
+
+--
+2.39.5
+
--- /dev/null
+From d1e43c16f00ab56df38b82b77443ff24572d06c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 18:45:42 -0700
+Subject: bpf: Add namespace to BPF internal symbols
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ Upstream commit f88886de0927a2adf4c1b4c5c1f1d31d2023ef74 ]
+
+Add namespace to BPF internal symbols used by light skeleton
+to prevent abuse and document with the code their allowed usage.
+
+Fixes: b1d18a7574d0 ("bpf: Extend sys_bpf commands for bpf_syscall programs.")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/bpf/20250425014542.62385-1-alexei.starovoitov@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/bpf/bpf_devel_QA.rst | 8 ++++++++
+ kernel/bpf/preload/bpf_preload_kern.c | 1 +
+ kernel/bpf/syscall.c | 6 +++---
+ 3 files changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
+index de27e1620821c..0acb4c9b8d90f 100644
+--- a/Documentation/bpf/bpf_devel_QA.rst
++++ b/Documentation/bpf/bpf_devel_QA.rst
+@@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted
+ into the Linux kernel, please implement support into LLVM's BPF back
+ end. See LLVM_ section below for further information.
+
++Q: What "BPF_INTERNAL" symbol namespace is for?
++-----------------------------------------------
++A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure
++like preload kernel modules with light skeleton. Most symbols outside
++of BPF_INTERNAL are not expected to be used by code outside of BPF either.
++Symbols may lack the designation because they predate the namespaces,
++or due to an oversight.
++
+ Stable submission
+ =================
+
+diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
+index 0c63bc2cd895a..56a81df7a9d7c 100644
+--- a/kernel/bpf/preload/bpf_preload_kern.c
++++ b/kernel/bpf/preload/bpf_preload_kern.c
+@@ -89,4 +89,5 @@ static void __exit fini(void)
+ }
+ late_initcall(load);
+ module_exit(fini);
++MODULE_IMPORT_NS("BPF_INTERNAL");
+ MODULE_LICENSE("GPL");
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index e1e42e918ba7f..1c2caae0d8946 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -1562,7 +1562,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
+
+ return map;
+ }
+-EXPORT_SYMBOL(bpf_map_get);
++EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL");
+
+ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
+ {
+@@ -3345,7 +3345,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
+ bpf_link_inc(link);
+ return link;
+ }
+-EXPORT_SYMBOL(bpf_link_get_from_fd);
++EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL");
+
+ static void bpf_tracing_link_release(struct bpf_link *link)
+ {
+@@ -5981,7 +5981,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+ return ____bpf_sys_bpf(cmd, attr, size);
+ }
+ }
+-EXPORT_SYMBOL(kern_sys_bpf);
++EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL");
+
+ static const struct bpf_func_proto bpf_sys_bpf_proto = {
+ .func = bpf_sys_bpf,
+--
+2.39.5
+
--- /dev/null
+From 3f9ddc4906b449a18458d225e01e88b6b949a8a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Mar 2025 17:46:35 +1030
+Subject: btrfs: avoid page_lockend underflow in btrfs_punch_hole_lock_range()
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit bc2dbc4983afedd198490cca043798f57c93e9bf ]
+
+[BUG]
+When running btrfs/004 with 4K fs block size and 64K page size,
+sometimes fsstress workload can take 100% CPU for a while, but not long
+enough to trigger a 120s hang warning.
+
+[CAUSE]
+When such 100% CPU usage happens, btrfs_punch_hole_lock_range() is
+always in the call trace.
+
+One example when this problem happens, the function
+btrfs_punch_hole_lock_range() got the following parameters:
+
+ lock_start = 4096, lockend = 20469
+
+Then we calculate @page_lockstart by rounding up lock_start to page
+boundary, which is 64K (page size is 64K).
+
+For @page_lockend, we round down the value towards page boundary, which
+result 0. Then since we need to pass an inclusive end to
+filemap_range_has_page(), we subtract 1 from the rounded down value,
+resulting in (u64)-1.
+
+In the above case, the range is inside the same page, and we do not even
+need to call filemap_range_has_page(), not to mention to call it with
+(u64)-1 at the end.
+
+This behavior will cause btrfs_punch_hole_lock_range() to busy loop
+waiting for irrelevant range to have its pages dropped.
+
+[FIX]
+Calculate @page_lockend by just rounding down @lockend, without
+decreasing the value by one. So @page_lockend will no longer overflow.
+
+Then exit early if @page_lockend is no larger than @page_lockstart.
+As it means either the range is inside the same page, or the two pages
+are adjacent already.
+
+Finally only decrease @page_lockend when calling filemap_range_has_page().
+
+Fixes: 0528476b6ac7 ("btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range()")
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/file.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 0b568c8d24cbc..a92997a583bd2 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
+ * will always return true.
+ * So here we need to do extra page alignment for
+ * filemap_range_has_page().
++ *
++ * And do not decrease page_lockend right now, as it can be 0.
+ */
+ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
+- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
++ const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE);
+
+ while (1) {
+ truncate_pagecache_range(inode, lockstart, lockend);
+
+ lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state);
++ /* The same page or adjacent pages. */
++ if (page_lockend <= page_lockstart)
++ break;
+ /*
+ * We can't have ordered extents in the range, nor dirty/writeback
+ * pages, because we have locked the inode's VFS lock in exclusive
+@@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode,
+ * we do, unlock the range and retry.
+ */
+ if (!filemap_range_has_page(inode->i_mapping, page_lockstart,
+- page_lockend))
++ page_lockend - 1))
+ break;
+
+ unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+--
+2.39.5
+
--- /dev/null
+From 1f2722c0acfbf983fb376a2199f8c40d32c86f13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Mar 2025 16:04:01 +0100
+Subject: btrfs: zoned: return EIO on RAID1 block group write pointer mismatch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+[ Upstream commit b0c26f47992672661340dd6ea931240213016609 ]
+
+There was a bug report about a NULL pointer dereference in
+__btrfs_add_free_space_zoned() that ultimately happens because a
+conversion from the default metadata profile DUP to a RAID1 profile on two
+disks.
+
+The stack trace has the following signature:
+
+ BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile
+ BUG: kernel NULL pointer dereference, address: 0000000000000058
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI
+ RIP: 0010:__btrfs_add_free_space_zoned.isra.0+0x61/0x1a0
+ RSP: 0018:ffffa236b6f3f6d0 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: ffff96c8132f3400 RCX: 0000000000000001
+ RDX: 0000000010000000 RSI: 0000000000000000 RDI: ffff96c8132f3410
+ RBP: 0000000010000000 R08: 0000000000000003 R09: 0000000000000000
+ R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000
+ R13: ffff96c758f65a40 R14: 0000000000000001 R15: 000011aac0000000
+ FS: 00007fdab1cb2900(0000) GS:ffff96e60ca00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000058 CR3: 00000001a05ae000 CR4: 0000000000350ef0
+ Call Trace:
+ <TASK>
+ ? __die_body.cold+0x19/0x27
+ ? page_fault_oops+0x15c/0x2f0
+ ? exc_page_fault+0x7e/0x180
+ ? asm_exc_page_fault+0x26/0x30
+ ? __btrfs_add_free_space_zoned.isra.0+0x61/0x1a0
+ btrfs_add_free_space_async_trimmed+0x34/0x40
+ btrfs_add_new_free_space+0x107/0x120
+ btrfs_make_block_group+0x104/0x2b0
+ btrfs_create_chunk+0x977/0xf20
+ btrfs_chunk_alloc+0x174/0x510
+ ? srso_return_thunk+0x5/0x5f
+ btrfs_inc_block_group_ro+0x1b1/0x230
+ btrfs_relocate_block_group+0x9e/0x410
+ btrfs_relocate_chunk+0x3f/0x130
+ btrfs_balance+0x8ac/0x12b0
+ ? srso_return_thunk+0x5/0x5f
+ ? srso_return_thunk+0x5/0x5f
+ ? __kmalloc_cache_noprof+0x14c/0x3e0
+ btrfs_ioctl+0x2686/0x2a80
+ ? srso_return_thunk+0x5/0x5f
+ ? ioctl_has_perm.constprop.0.isra.0+0xd2/0x120
+ __x64_sys_ioctl+0x97/0xc0
+ do_syscall_64+0x82/0x160
+ ? srso_return_thunk+0x5/0x5f
+ ? __memcg_slab_free_hook+0x11a/0x170
+ ? srso_return_thunk+0x5/0x5f
+ ? kmem_cache_free+0x3f0/0x450
+ ? srso_return_thunk+0x5/0x5f
+ ? srso_return_thunk+0x5/0x5f
+ ? syscall_exit_to_user_mode+0x10/0x210
+ ? srso_return_thunk+0x5/0x5f
+ ? do_syscall_64+0x8e/0x160
+ ? sysfs_emit+0xaf/0xc0
+ ? srso_return_thunk+0x5/0x5f
+ ? srso_return_thunk+0x5/0x5f
+ ? seq_read_iter+0x207/0x460
+ ? srso_return_thunk+0x5/0x5f
+ ? vfs_read+0x29c/0x370
+ ? srso_return_thunk+0x5/0x5f
+ ? srso_return_thunk+0x5/0x5f
+ ? syscall_exit_to_user_mode+0x10/0x210
+ ? srso_return_thunk+0x5/0x5f
+ ? do_syscall_64+0x8e/0x160
+ ? srso_return_thunk+0x5/0x5f
+ ? exc_page_fault+0x7e/0x180
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+ RIP: 0033:0x7fdab1e0ca6d
+ RSP: 002b:00007ffeb2b60c80 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+ RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdab1e0ca6d
+ RDX: 00007ffeb2b60d80 RSI: 00000000c4009420 RDI: 0000000000000003
+ RBP: 00007ffeb2b60cd0 R08: 0000000000000000 R09: 0000000000000013
+ R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+ R13: 00007ffeb2b6343b R14: 00007ffeb2b60d80 R15: 0000000000000001
+ </TASK>
+ CR2: 0000000000000058
+ ---[ end trace 0000000000000000 ]---
+
+The 1st line is the most interesting here:
+
+ BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile
+
+When a RAID1 block-group is created and a write pointer mismatch between
+the disks in the RAID set is detected, btrfs sets the alloc_offset to the
+length of the block group marking it as full. Afterwards the code expects
+that a balance operation will evacuate the data in this block-group and
+repair the problems.
+
+But before this is possible, the new space of this block-group will be
+accounted in the free space cache. But in __btrfs_add_free_space_zoned()
+it is being checked if it is a initial creation of a block group and if
+not a reclaim decision will be made. But the decision if a block-group's
+free space accounting is done for an initial creation depends on if the
+size of the added free space is the whole length of the block-group and
+the allocation offset is 0.
+
+But as btrfs_load_block_group_zone_info() sets the allocation offset to
+the zone capacity (i.e. marking the block-group as full) this initial
+decision is not met, and the space_info pointer in the 'struct
+btrfs_block_group' has not yet been assigned.
+
+Fail creation of the block group and rely on manual user intervention to
+re-balance the filesystem.
+
+Afterwards the filesystem can be unmounted, mounted in degraded mode and
+the missing device can be removed after a full balance of the filesystem.
+
+Reported-by: 西木野羰基 <yanqiyu01@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CAB_b4sBhDe3tscz=duVyhc9hNE+gu=B8CrgLO152uMyanR8BEA@mail.gmail.com/
+Fixes: b1934cd60695 ("btrfs: zoned: handle broken write pointer on zones")
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/zoned.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index aaf925897fdda..978a57da8b4f5 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1659,7 +1659,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ * stripe.
+ */
+ cache->alloc_offset = cache->zone_capacity;
+- ret = 0;
+ }
+
+ out:
+--
+2.39.5
+
--- /dev/null
+From aa71665db9dfcfcd8024c828fccc980f056548cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Mar 2025 10:47:11 +0000
+Subject: ceph: Fix incorrect flush end position calculation
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit f452a2204614fc10e2c3b85904c4bd300c2789dc ]
+
+In ceph, in fill_fscrypt_truncate(), the end flush position is calculated
+by:
+
+ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+
+but that's using the block shift not the block size.
+
+Fix this to use the block size instead.
+
+Fixes: 5c64737d2536 ("ceph: add truncate size handling support for fscrypt")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
+index 7dd6c2275085b..e3ab07797c850 100644
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -2362,7 +2362,7 @@ static int fill_fscrypt_truncate(struct inode *inode,
+
+ /* Try to writeback the dirty pagecaches */
+ if (issued & (CEPH_CAP_FILE_BUFFER)) {
+- loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
++ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping,
+ orig_pos, lend);
+--
+2.39.5
+
--- /dev/null
+From 24e6f3337536d9bdf593a691bd534dc1613d5f91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Apr 2025 21:17:51 +0000
+Subject: cgroup/cpuset-v1: Add missing support for cpuset_v2_mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: T.J. Mercier <tjmercier@google.com>
+
+[ Upstream commit 1bf67c8fdbda21fadd564a12dbe2b13c1ea5eda7 ]
+
+Android has mounted the v1 cpuset controller using filesystem type
+"cpuset" (not "cgroup") since 2015 [1], and depends on the resulting
+behavior where the controller name is not added as a prefix for cgroupfs
+files. [2]
+
+Later, a problem was discovered where cpu hotplug onlining did not
+affect the cpuset/cpus files, which Android carried an out-of-tree patch
+to address for a while. An attempt was made to upstream this patch, but
+the recommendation was to use the "cpuset_v2_mode" mount option
+instead. [3]
+
+An effort was made to do so, but this fails with "cgroup: Unknown
+parameter 'cpuset_v2_mode'" because commit e1cba4b85daa ("cgroup: Add
+mount flag to enable cpuset to use v2 behavior in v1 cgroup") did not
+update the special cased cpuset_mount(), and only the cgroup (v1)
+filesystem type was updated.
+
+Add parameter parsing to the cpuset filesystem type so that
+cpuset_v2_mode works like the cgroup filesystem type:
+
+$ mkdir /dev/cpuset
+$ mount -t cpuset -ocpuset_v2_mode none /dev/cpuset
+$ mount|grep cpuset
+none on /dev/cpuset type cgroup (rw,relatime,cpuset,noprefix,cpuset_v2_mode,release_agent=/sbin/cpuset_release_agent)
+
+[1] https://cs.android.com/android/_/android/platform/system/core/+/b769c8d24fd7be96f8968aa4c80b669525b930d3
+[2] https://cs.android.com/android/platform/superproject/main/+/main:system/core/libprocessgroup/setup/cgroup_map_write.cpp;drc=2dac5d89a0f024a2d0cc46a80ba4ee13472f1681;l=192
+[3] https://lore.kernel.org/lkml/f795f8be-a184-408a-0b5a-553d26061385@redhat.com/T/
+
+Fixes: e1cba4b85daa ("cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup")
+Signed-off-by: T.J. Mercier <tjmercier@google.com>
+Acked-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
+Acked-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 81f078c059e86..68d58753c75c3 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2339,9 +2339,37 @@ static struct file_system_type cgroup2_fs_type = {
+ };
+
+ #ifdef CONFIG_CPUSETS_V1
++enum cpuset_param {
++ Opt_cpuset_v2_mode,
++};
++
++static const struct fs_parameter_spec cpuset_fs_parameters[] = {
++ fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
++ {}
++};
++
++static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param)
++{
++ struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
++ struct fs_parse_result result;
++ int opt;
++
++ opt = fs_parse(fc, cpuset_fs_parameters, param, &result);
++ if (opt < 0)
++ return opt;
++
++ switch (opt) {
++ case Opt_cpuset_v2_mode:
++ ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
++ return 0;
++ }
++ return -EINVAL;
++}
++
+ static const struct fs_context_operations cpuset_fs_context_ops = {
+ .get_tree = cgroup1_get_tree,
+ .free = cgroup_fs_context_free,
++ .parse_param = cpuset_parse_param,
+ };
+
+ /*
+@@ -2378,6 +2406,7 @@ static int cpuset_init_fs_context(struct fs_context *fc)
+ static struct file_system_type cpuset_fs_type = {
+ .name = "cpuset",
+ .init_fs_context = cpuset_init_fs_context,
++ .parameters = cpuset_fs_parameters,
+ .fs_flags = FS_USERNS_MOUNT,
+ };
+ #endif
+--
+2.39.5
+
--- /dev/null
+From c9563ac07a33660df349cad5b1af3bfe43311b03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 20:48:13 +0800
+Subject: cpufreq: apple-soc: Fix null-ptr-deref in
+ apple_soc_cpufreq_get_rate()
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit 9992649f6786921873a9b89dafa5e04d8c5fef2b ]
+
+cpufreq_cpu_get_raw() can return NULL when the target CPU is not present
+in the policy->cpus mask. apple_soc_cpufreq_get_rate() does not check
+for this case, which results in a NULL pointer dereference.
+
+Fixes: 6286bbb40576 ("cpufreq: apple-soc: Add new driver to control Apple SoC CPU P-states")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/apple-soc-cpufreq.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c
+index 269b18c62d040..82007f6a24d2a 100644
+--- a/drivers/cpufreq/apple-soc-cpufreq.c
++++ b/drivers/cpufreq/apple-soc-cpufreq.c
+@@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = {
+
+ static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu)
+ {
+- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
+- struct apple_cpu_priv *priv = policy->driver_data;
++ struct cpufreq_policy *policy;
++ struct apple_cpu_priv *priv;
+ struct cpufreq_frequency_table *p;
+ unsigned int pstate;
+
++ policy = cpufreq_cpu_get_raw(cpu);
++ if (unlikely(!policy))
++ return 0;
++
++ priv = policy->driver_data;
++
+ if (priv->info->cur_pstate_mask) {
+ u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS);
+
+--
+2.39.5
+
--- /dev/null
+From c270997fc0d99ab1fcf8e80c185796e4bbd07695 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 13 Apr 2025 11:11:42 +0100
+Subject: cpufreq: cppc: Fix invalid return value in .get() callback
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc ]
+
+Returning a negative error code in a function with an unsigned
+return type is a pretty bad idea. It is probably worse when the
+justification for the change is "our static analisys tool found it".
+
+Fixes: cf7de25878a1 ("cppc_cpufreq: Fix possible null pointer dereference")
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Viresh Kumar <viresh.kumar@linaro.org>
+Reviewed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/cppc_cpufreq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
+index 8f512448382f4..ba7c16c0e4756 100644
+--- a/drivers/cpufreq/cppc_cpufreq.c
++++ b/drivers/cpufreq/cppc_cpufreq.c
+@@ -749,7 +749,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
+ int ret;
+
+ if (!policy)
+- return -ENODEV;
++ return 0;
+
+ cpu_data = policy->driver_data;
+
+--
+2.39.5
+
--- /dev/null
+From b47e6c7651654b1b4640eebc24502c38a820566d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Apr 2025 14:40:06 +0200
+Subject: cpufreq: Do not enable by default during compile testing
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit d4f610a9bafdec8e3210789aa19335367da696ea ]
+
+Enabling the compile test should not cause automatic enabling of all
+drivers.
+
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Stable-dep-of: a374f28700ab ("cpufreq: fix compile-test defaults")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/Kconfig.arm | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
+index 4f9cb943d945c..d4d625ded285f 100644
+--- a/drivers/cpufreq/Kconfig.arm
++++ b/drivers/cpufreq/Kconfig.arm
+@@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ
+ config ARM_BRCMSTB_AVS_CPUFREQ
+ tristate "Broadcom STB AVS CPUfreq driver"
+ depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST
+- default y
++ default ARCH_BRCMSTB
+ help
+ Some Broadcom STB SoCs use a co-processor running proprietary firmware
+ ("AVS") to handle voltage and frequency scaling. This driver provides
+@@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ
+ config ARM_S3C64XX_CPUFREQ
+ bool "Samsung S3C64XX"
+ depends on CPU_S3C6410 || COMPILE_TEST
+- default y
++ default CPU_S3C6410
+ help
+ This adds the CPUFreq driver for Samsung S3C6410 SoC.
+
+@@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ
+ config ARM_S5PV210_CPUFREQ
+ bool "Samsung S5PV210 and S5PC110"
+ depends on CPU_S5PV210 || COMPILE_TEST
+- default y
++ default CPU_S5PV210
+ help
+ This adds the CPUFreq driver for Samsung S5PV210 and
+ S5PC110 SoCs.
+@@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ
+ config ARM_SPEAR_CPUFREQ
+ bool "SPEAr CPUFreq support"
+ depends on PLAT_SPEAR || COMPILE_TEST
+- default y
++ default PLAT_SPEAR
+ help
+ This adds the CPUFreq driver support for SPEAr SOCs.
+
+@@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ
+ tristate "Tegra20/30 CPUFreq support"
+ depends on ARCH_TEGRA || COMPILE_TEST
+ depends on CPUFREQ_DT
+- default y
++ default ARCH_TEGRA
+ help
+ This adds the CPUFreq driver support for Tegra20/30 SOCs.
+
+@@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ
+ bool "Tegra124 CPUFreq support"
+ depends on ARCH_TEGRA || COMPILE_TEST
+ depends on CPUFREQ_DT
+- default y
++ default ARCH_TEGRA
+ help
+ This adds the CPUFreq driver support for Tegra124 SOCs.
+
+@@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ
+ tristate "Tegra194 CPUFreq support"
+ depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST)
+ depends on TEGRA_BPMP
+- default y
++ default ARCH_TEGRA
+ help
+ This adds CPU frequency driver support for Tegra194 SOCs.
+
+ config ARM_TI_CPUFREQ
+ bool "Texas Instruments CPUFreq support"
+ depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST
+- default y
++ default ARCH_OMAP2PLUS || ARCH_K3
+ help
+ This driver enables valid OPPs on the running platform based on
+ values contained within the SoC in use. Enable this in order to
+--
+2.39.5
+
--- /dev/null
+From 3a0eaccb1e5224d271a72931c33fde40e74141db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 09:28:38 +0200
+Subject: cpufreq: fix compile-test defaults
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit a374f28700abd20e8a7d026f89aa26f759445918 ]
+
+Commit 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers")
+enabled compile testing of most Arm CPUFreq drivers but left the
+existing default values unchanged so that many drivers are enabled by
+default whenever COMPILE_TEST is selected.
+
+This specifically results in the S3C64XX CPUFreq driver being enabled
+and initialised during boot of non-S3C64XX platforms with the following
+error logged:
+
+ cpufreq: Unable to obtain ARMCLK: -2
+
+Commit d4f610a9bafd ("cpufreq: Do not enable by default during compile
+testing") recently fixed most of the default values, but two entries
+were missed and two could use a more specific default condition.
+
+Fix the default values for drivers that can be compile tested and that
+should be enabled by default when not compile testing.
+
+Fixes: 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers")
+Cc: Rob Herring (Arm) <robh@kernel.org>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/Kconfig.arm | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
+index d4d625ded285f..0d46402e30942 100644
+--- a/drivers/cpufreq/Kconfig.arm
++++ b/drivers/cpufreq/Kconfig.arm
+@@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ
+ config ARM_BRCMSTB_AVS_CPUFREQ
+ tristate "Broadcom STB AVS CPUfreq driver"
+ depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST
+- default ARCH_BRCMSTB
++ default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ
+ help
+ Some Broadcom STB SoCs use a co-processor running proprietary firmware
+ ("AVS") to handle voltage and frequency scaling. This driver provides
+@@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ
+ tristate "Calxeda Highbank-based"
+ depends on ARCH_HIGHBANK || COMPILE_TEST
+ depends on CPUFREQ_DT && REGULATOR && PL320_MBOX
+- default m
++ default m if ARCH_HIGHBANK
+ help
+ This adds the CPUFreq driver for Calxeda Highbank SoC
+ based boards.
+@@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ
+ config ARM_MEDIATEK_CPUFREQ_HW
+ tristate "MediaTek CPUFreq HW driver"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+- default m
++ default m if ARCH_MEDIATEK
+ help
+ Support for the CPUFreq HW driver.
+ Some MediaTek chipsets have a HW engine to offload the steps
+@@ -256,7 +256,7 @@ config ARM_TEGRA194_CPUFREQ
+ tristate "Tegra194 CPUFreq support"
+ depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST)
+ depends on TEGRA_BPMP
+- default ARCH_TEGRA
++ default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC
+ help
+ This adds CPU frequency driver support for Tegra194 SOCs.
+
+--
+2.39.5
+
--- /dev/null
+From 4f9719687b9613b4a71c00b5c3a0e812d7de8d68 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 23:03:53 +0800
+Subject: cpufreq: scmi: Fix null-ptr-deref in scmi_cpufreq_get_rate()
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit 484d3f15cc6cbaa52541d6259778e715b2c83c54 ]
+
+cpufreq_cpu_get_raw() can return NULL when the target CPU is not present
+in the policy->cpus mask. scmi_cpufreq_get_rate() does not check for
+this case, which results in a NULL pointer dereference.
+
+Add NULL check after cpufreq_cpu_get_raw() to prevent this issue.
+
+Fixes: 99d6bdf33877 ("cpufreq: add support for CPU DVFS based on SCMI message protocol")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Acked-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/scmi-cpufreq.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
+index 914bf2c940a03..9c6eb1238f1be 100644
+--- a/drivers/cpufreq/scmi-cpufreq.c
++++ b/drivers/cpufreq/scmi-cpufreq.c
+@@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver;
+
+ static unsigned int scmi_cpufreq_get_rate(unsigned int cpu)
+ {
+- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
+- struct scmi_data *priv = policy->driver_data;
++ struct cpufreq_policy *policy;
++ struct scmi_data *priv;
+ unsigned long rate;
+ int ret;
+
++ policy = cpufreq_cpu_get_raw(cpu);
++ if (unlikely(!policy))
++ return 0;
++
++ priv = policy->driver_data;
++
+ ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false);
+ if (ret)
+ return 0;
+--
+2.39.5
+
--- /dev/null
+From b09d7e9afd73a36a06a08bb4f4f8223a3a892c96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 23:03:54 +0800
+Subject: cpufreq: scpi: Fix null-ptr-deref in scpi_cpufreq_get_rate()
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit 73b24dc731731edf762f9454552cb3a5b7224949 ]
+
+cpufreq_cpu_get_raw() can return NULL when the target CPU is not present
+in the policy->cpus mask. scpi_cpufreq_get_rate() does not check for
+this case, which results in a NULL pointer dereference.
+
+Fixes: 343a8d17fa8d ("cpufreq: scpi: remove arm_big_little dependency")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Acked-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/scpi-cpufreq.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
+index 1f97b949763fa..9118856e17365 100644
+--- a/drivers/cpufreq/scpi-cpufreq.c
++++ b/drivers/cpufreq/scpi-cpufreq.c
+@@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops;
+
+ static unsigned int scpi_cpufreq_get_rate(unsigned int cpu)
+ {
+- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
+- struct scpi_data *priv = policy->driver_data;
+- unsigned long rate = clk_get_rate(priv->clk);
++ struct cpufreq_policy *policy;
++ struct scpi_data *priv;
++ unsigned long rate;
++
++ policy = cpufreq_cpu_get_raw(cpu);
++ if (unlikely(!policy))
++ return 0;
++
++ priv = policy->driver_data;
++ rate = clk_get_rate(priv->clk);
+
+ return rate / 1000;
+ }
+--
+2.39.5
+
--- /dev/null
+From e4740cd400b0d34137fa688dcc5e93d9a8344fe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Mar 2025 15:55:57 +0000
+Subject: cpufreq: sun50i: prevent out-of-bounds access
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andre Przywara <andre.przywara@arm.com>
+
+[ Upstream commit 14c8a418159e541d70dbf8fc71225d1623beaf0f ]
+
+A KASAN enabled kernel reports an out-of-bounds access when handling the
+nvmem cell in the sun50i cpufreq driver:
+==================================================================
+BUG: KASAN: slab-out-of-bounds in sun50i_cpufreq_nvmem_probe+0x180/0x3d4
+Read of size 4 at addr ffff000006bf31e0 by task kworker/u16:1/38
+
+This is because the DT specifies the nvmem cell as covering only two
+bytes, but we use a u32 pointer to read the value. DTs for other SoCs
+indeed specify 4 bytes, so we cannot just shorten the variable to a u16.
+
+Fortunately nvmem_cell_read() allows to return the length of the nvmem
+cell, in bytes, so we can use that information to only access the valid
+portion of the data.
+To cover multiple cell sizes, use memcpy() to copy the information into a
+zeroed u32 buffer, then also make sure we always read the data in little
+endian fashion, as this is how the data is stored in the SID efuses.
+
+Fixes: 6cc4bcceff9a ("cpufreq: sun50i: Refactor speed bin decoding")
+Reported-by: Jernej Skrabec <jernej.skrabec@gmail.com>
+Signed-off-by: Andre Przywara <andre.przywara@arm.com>
+Reviewed-by: Jernej Å krabec <jernej.skrabec@gmail.com>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/sun50i-cpufreq-nvmem.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+index 47d6840b34899..744312a44279c 100644
+--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
++++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+@@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void)
+ struct nvmem_cell *speedbin_nvmem;
+ const struct of_device_id *match;
+ struct device *cpu_dev;
+- u32 *speedbin;
++ void *speedbin_ptr;
++ u32 speedbin = 0;
++ size_t len;
+ int ret;
+
+ cpu_dev = get_cpu_device(0);
+@@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void)
+ return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem),
+ "Could not get nvmem cell\n");
+
+- speedbin = nvmem_cell_read(speedbin_nvmem, NULL);
++ speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len);
+ nvmem_cell_put(speedbin_nvmem);
+- if (IS_ERR(speedbin))
+- return PTR_ERR(speedbin);
++ if (IS_ERR(speedbin_ptr))
++ return PTR_ERR(speedbin_ptr);
+
+- ret = opp_data->efuse_xlate(*speedbin);
++ if (len <= 4)
++ memcpy(&speedbin, speedbin_ptr, len);
++ speedbin = le32_to_cpu(speedbin);
+
+- kfree(speedbin);
++ ret = opp_data->efuse_xlate(speedbin);
++
++ kfree(speedbin_ptr);
+
+ return ret;
+ };
+--
+2.39.5
+
--- /dev/null
+From 9ccc66c19f49c381dbe33757538d6032e3b93543 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 17:15:42 +0200
+Subject: dma/contiguous: avoid warning about unused size_bytes
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit d7b98ae5221007d3f202746903d4c21c7caf7ea9 ]
+
+When building with W=1, this variable is unused for configs with
+CONFIG_CMA_SIZE_SEL_PERCENTAGE=y:
+
+kernel/dma/contiguous.c:67:26: error: 'size_bytes' defined but not used [-Werror=unused-const-variable=]
+
+Change this to a macro to avoid the warning.
+
+Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/r/20250409151557.3890443-1-arnd@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/contiguous.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
+index 055da410ac71d..8df0dfaaca18e 100644
+--- a/kernel/dma/contiguous.c
++++ b/kernel/dma/contiguous.c
+@@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area;
+ * Users, who want to set the size of global CMA area for their system
+ * should use cma= kernel parameter.
+ */
+-static const phys_addr_t size_bytes __initconst =
+- (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
++#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M)
+ static phys_addr_t size_cmdline __initdata = -1;
+ static phys_addr_t base_cmdline __initdata;
+ static phys_addr_t limit_cmdline __initdata;
+--
+2.39.5
+
--- /dev/null
+From 724521f1225d0508218a2eebae156a33d278a691 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 22:13:00 +0200
+Subject: drm/meson: use unsigned long long / Hz for frequency types
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+[ Upstream commit 1017560164b6bbcbc93579266926e6e96675262a ]
+
+Christian reports that 4K output using YUV420 encoding fails with the
+following error:
+ Fatal Error, invalid HDMI vclk freq 593406
+
+Modetest shows the following:
+ 3840x2160 59.94 3840 4016 4104 4400 2160 2168 2178 2250 593407 flags: xxxx, xxxx,
+ drm calculated value -------------------------------------^
+
+This indicates that there's a (1kHz) mismatch between the clock
+calculated by the drm framework and the meson driver.
+
+Relevant function call stack:
+(drm framework)
+ -> meson_encoder_hdmi_atomic_enable()
+ -> meson_encoder_hdmi_set_vclk()
+ -> meson_vclk_setup()
+
+The video clock requested by the drm framework is 593407kHz. This is
+passed by meson_encoder_hdmi_atomic_enable() to
+meson_encoder_hdmi_set_vclk() and the following formula is applied:
+- the frequency is halved (which would be 296703.5kHz) and rounded down
+ to the next full integer, which is 296703kHz
+- TMDS clock is calculated (296703kHz * 10)
+- video encoder clock is calculated - this needs to match a table from
+ meson_vclk.c and so it doubles the previously halved value again
+ (resulting in 593406kHz)
+- meson_vclk_setup() can't find (either directly, or by deriving it from
+ 594000kHz * 1000 / 1001 and rounding to the closest integer value -
+ which is 593407kHz as originally requested by the drm framework) a
+ matching clock in it's internal table and errors out with "invalid
+ HDMI vclk freq"
+
+Fix the division precision by switching the whole meson driver to use
+unsigned long long (64-bit) Hz values for clock frequencies instead of
+unsigned int (32-bit) kHz to fix the rouding error.
+
+Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup")
+Reported-by: Christian Hewitt <christianshewitt@gmail.com>
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/meson/meson_drv.c | 2 +-
+ drivers/gpu/drm/meson/meson_drv.h | 2 +-
+ drivers/gpu/drm/meson/meson_encoder_hdmi.c | 29 +--
+ drivers/gpu/drm/meson/meson_vclk.c | 195 +++++++++++----------
+ drivers/gpu/drm/meson/meson_vclk.h | 13 +-
+ 5 files changed, 126 insertions(+), 115 deletions(-)
+
+diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
+index 81d2ee37e7732..49ff9f1f16d32 100644
+--- a/drivers/gpu/drm/meson/meson_drv.c
++++ b/drivers/gpu/drm/meson/meson_drv.c
+@@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = {
+ /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */
+ {
+ .limits = {
+- .max_hdmi_phy_freq = 1650000,
++ .max_hdmi_phy_freq = 1650000000,
+ },
+ .attrs = (const struct soc_device_attribute []) {
+ { .soc_id = "GXL (S805*)", },
+diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
+index 3f9345c14f31c..be4b0e4df6e13 100644
+--- a/drivers/gpu/drm/meson/meson_drv.h
++++ b/drivers/gpu/drm/meson/meson_drv.h
+@@ -37,7 +37,7 @@ struct meson_drm_match_data {
+ };
+
+ struct meson_drm_soc_limits {
+- unsigned int max_hdmi_phy_freq;
++ unsigned long long max_hdmi_phy_freq;
+ };
+
+ struct meson_drm {
+diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+index 0593a1cde906f..ce8cea5d3a56b 100644
+--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+@@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
+ {
+ struct meson_drm *priv = encoder_hdmi->priv;
+ int vic = drm_match_cea_mode(mode);
+- unsigned int phy_freq;
+- unsigned int vclk_freq;
+- unsigned int venc_freq;
+- unsigned int hdmi_freq;
++ unsigned long long phy_freq;
++ unsigned long long vclk_freq;
++ unsigned long long venc_freq;
++ unsigned long long hdmi_freq;
+
+- vclk_freq = mode->clock;
++ vclk_freq = mode->clock * 1000;
+
+ /* For 420, pixel clock is half unlike venc clock */
+ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
+@@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
+ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+ venc_freq /= 2;
+
+- dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n",
++ dev_dbg(priv->dev,
++ "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n",
+ phy_freq, vclk_freq, venc_freq, hdmi_freq,
+ priv->venc.hdmi_use_enci);
+
+@@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri
+ struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
+ struct meson_drm *priv = encoder_hdmi->priv;
+ bool is_hdmi2_sink = display_info->hdmi.scdc.supported;
+- unsigned int phy_freq;
+- unsigned int vclk_freq;
+- unsigned int venc_freq;
+- unsigned int hdmi_freq;
++ unsigned long long clock = mode->clock * 1000;
++ unsigned long long phy_freq;
++ unsigned long long vclk_freq;
++ unsigned long long venc_freq;
++ unsigned long long hdmi_freq;
+ int vic = drm_match_cea_mode(mode);
+ enum drm_mode_status status;
+
+@@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri
+ if (status != MODE_OK)
+ return status;
+
+- return meson_vclk_dmt_supported_freq(priv, mode->clock);
++ return meson_vclk_dmt_supported_freq(priv, clock);
+ /* Check against supported VIC modes */
+ } else if (!meson_venc_hdmi_supported_vic(vic))
+ return MODE_BAD;
+
+- vclk_freq = mode->clock;
++ vclk_freq = clock;
+
+ /* For 420, pixel clock is half unlike venc clock */
+ if (drm_mode_is_420_only(display_info, mode) ||
+@@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri
+ if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+ venc_freq /= 2;
+
+- dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n",
++ dev_dbg(priv->dev,
++ "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n",
+ __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq);
+
+ return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq);
+diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
+index 2a82119eb58ed..3325580d885d0 100644
+--- a/drivers/gpu/drm/meson/meson_vclk.c
++++ b/drivers/gpu/drm/meson/meson_vclk.c
+@@ -110,7 +110,10 @@
+ #define HDMI_PLL_LOCK BIT(31)
+ #define HDMI_PLL_LOCK_G12A (3 << 30)
+
+-#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001)
++#define PIXEL_FREQ_1000_1001(_freq) \
++ DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL)
++#define PHY_FREQ_1000_1001(_freq) \
++ (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10)
+
+ /* VID PLL Dividers */
+ enum {
+@@ -360,11 +363,11 @@ enum {
+ };
+
+ struct meson_vclk_params {
+- unsigned int pll_freq;
+- unsigned int phy_freq;
+- unsigned int vclk_freq;
+- unsigned int venc_freq;
+- unsigned int pixel_freq;
++ unsigned long long pll_freq;
++ unsigned long long phy_freq;
++ unsigned long long vclk_freq;
++ unsigned long long venc_freq;
++ unsigned long long pixel_freq;
+ unsigned int pll_od1;
+ unsigned int pll_od2;
+ unsigned int pll_od3;
+@@ -372,11 +375,11 @@ struct meson_vclk_params {
+ unsigned int vclk_div;
+ } params[] = {
+ [MESON_VCLK_HDMI_ENCI_54000] = {
+- .pll_freq = 4320000,
+- .phy_freq = 270000,
+- .vclk_freq = 54000,
+- .venc_freq = 54000,
+- .pixel_freq = 54000,
++ .pll_freq = 4320000000,
++ .phy_freq = 270000000,
++ .vclk_freq = 54000000,
++ .venc_freq = 54000000,
++ .pixel_freq = 54000000,
+ .pll_od1 = 4,
+ .pll_od2 = 4,
+ .pll_od3 = 1,
+@@ -384,11 +387,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_DDR_54000] = {
+- .pll_freq = 4320000,
+- .phy_freq = 270000,
+- .vclk_freq = 54000,
+- .venc_freq = 54000,
+- .pixel_freq = 27000,
++ .pll_freq = 4320000000,
++ .phy_freq = 270000000,
++ .vclk_freq = 54000000,
++ .venc_freq = 54000000,
++ .pixel_freq = 27000000,
+ .pll_od1 = 4,
+ .pll_od2 = 4,
+ .pll_od3 = 1,
+@@ -396,11 +399,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_DDR_148500] = {
+- .pll_freq = 2970000,
+- .phy_freq = 742500,
+- .vclk_freq = 148500,
+- .venc_freq = 148500,
+- .pixel_freq = 74250,
++ .pll_freq = 2970000000,
++ .phy_freq = 742500000,
++ .vclk_freq = 148500000,
++ .venc_freq = 148500000,
++ .pixel_freq = 74250000,
+ .pll_od1 = 4,
+ .pll_od2 = 1,
+ .pll_od3 = 1,
+@@ -408,11 +411,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_74250] = {
+- .pll_freq = 2970000,
+- .phy_freq = 742500,
+- .vclk_freq = 74250,
+- .venc_freq = 74250,
+- .pixel_freq = 74250,
++ .pll_freq = 2970000000,
++ .phy_freq = 742500000,
++ .vclk_freq = 74250000,
++ .venc_freq = 74250000,
++ .pixel_freq = 74250000,
+ .pll_od1 = 2,
+ .pll_od2 = 2,
+ .pll_od3 = 2,
+@@ -420,11 +423,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_148500] = {
+- .pll_freq = 2970000,
+- .phy_freq = 1485000,
+- .vclk_freq = 148500,
+- .venc_freq = 148500,
+- .pixel_freq = 148500,
++ .pll_freq = 2970000000,
++ .phy_freq = 1485000000,
++ .vclk_freq = 148500000,
++ .venc_freq = 148500000,
++ .pixel_freq = 148500000,
+ .pll_od1 = 1,
+ .pll_od2 = 2,
+ .pll_od3 = 2,
+@@ -432,11 +435,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_297000] = {
+- .pll_freq = 5940000,
+- .phy_freq = 2970000,
+- .venc_freq = 297000,
+- .vclk_freq = 297000,
+- .pixel_freq = 297000,
++ .pll_freq = 5940000000,
++ .phy_freq = 2970000000,
++ .venc_freq = 297000000,
++ .vclk_freq = 297000000,
++ .pixel_freq = 297000000,
+ .pll_od1 = 2,
+ .pll_od2 = 1,
+ .pll_od3 = 1,
+@@ -444,11 +447,11 @@ struct meson_vclk_params {
+ .vclk_div = 2,
+ },
+ [MESON_VCLK_HDMI_594000] = {
+- .pll_freq = 5940000,
+- .phy_freq = 5940000,
+- .venc_freq = 594000,
+- .vclk_freq = 594000,
+- .pixel_freq = 594000,
++ .pll_freq = 5940000000,
++ .phy_freq = 5940000000,
++ .venc_freq = 594000000,
++ .vclk_freq = 594000000,
++ .pixel_freq = 594000000,
+ .pll_od1 = 1,
+ .pll_od2 = 1,
+ .pll_od3 = 2,
+@@ -456,11 +459,11 @@ struct meson_vclk_params {
+ .vclk_div = 1,
+ },
+ [MESON_VCLK_HDMI_594000_YUV420] = {
+- .pll_freq = 5940000,
+- .phy_freq = 2970000,
+- .venc_freq = 594000,
+- .vclk_freq = 594000,
+- .pixel_freq = 297000,
++ .pll_freq = 5940000000,
++ .phy_freq = 2970000000,
++ .venc_freq = 594000000,
++ .vclk_freq = 594000000,
++ .pixel_freq = 297000000,
+ .pll_od1 = 2,
+ .pll_od2 = 1,
+ .pll_od3 = 1,
+@@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m,
+ 3 << 20, pll_od_to_reg(od3) << 20);
+ }
+
+-#define XTAL_FREQ 24000
++#define XTAL_FREQ (24 * 1000 * 1000)
+
+ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv,
+- unsigned int pll_freq)
++ unsigned long long pll_freq)
+ {
+ /* The GXBB PLL has a /2 pre-multiplier */
+ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB))
+- pll_freq /= 2;
++ pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2);
+
+- return pll_freq / XTAL_FREQ;
++ return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ);
+ }
+
+ #define HDMI_FRAC_MAX_GXBB 4096
+@@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv,
+
+ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv,
+ unsigned int m,
+- unsigned int pll_freq)
++ unsigned long long pll_freq)
+ {
+- unsigned int parent_freq = XTAL_FREQ;
++ unsigned long long parent_freq = XTAL_FREQ;
+ unsigned int frac_max = HDMI_FRAC_MAX_GXL;
+ unsigned int frac_m;
+ unsigned int frac;
++ u32 remainder;
+
+ /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */
+ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) {
+@@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv,
+ frac_max = HDMI_FRAC_MAX_G12A;
+
+ /* We can have a perfect match !*/
+- if (pll_freq / m == parent_freq &&
+- pll_freq % m == 0)
++ if (div_u64_rem(pll_freq, m, &remainder) == parent_freq &&
++ remainder == 0)
+ return 0;
+
+- frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq);
++ frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq);
+ frac_m = m * frac_max;
+ if (frac_m > frac)
+ return frac_max;
+@@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv,
+ }
+
+ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv,
+- unsigned int m,
++ unsigned long long m,
+ unsigned int frac)
+ {
+ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) {
+@@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv,
+ }
+
+ static bool meson_hdmi_pll_find_params(struct meson_drm *priv,
+- unsigned int freq,
++ unsigned long long freq,
+ unsigned int *m,
+ unsigned int *frac,
+ unsigned int *od)
+@@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv,
+ continue;
+ *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od);
+
+- DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n",
++ DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n",
+ freq, *m, *frac, *od);
+
+ if (meson_hdmi_pll_validate_params(priv, *m, *frac))
+@@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv,
+
+ /* pll_freq is the frequency after the OD dividers */
+ enum drm_mode_status
+-meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq)
++meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq)
+ {
+ unsigned int od, m, frac;
+
+@@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq);
+
+ /* pll_freq is the frequency after the OD dividers */
+ static void meson_hdmi_pll_generic_set(struct meson_drm *priv,
+- unsigned int pll_freq)
++ unsigned long long pll_freq)
+ {
+ unsigned int od, m, frac, od1, od2, od3;
+
+@@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv,
+ od1 = od / od2;
+ }
+
+- DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n",
++ DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n",
+ pll_freq, m, frac, od1, od2, od3);
+
+ meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3);
+@@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv,
+ return;
+ }
+
+- DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n",
++ DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n",
+ pll_freq);
+ }
+
+ enum drm_mode_status
+-meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+- unsigned int vclk_freq)
++meson_vclk_vic_supported_freq(struct meson_drm *priv,
++ unsigned long long phy_freq,
++ unsigned long long vclk_freq)
+ {
+ int i;
+
+- DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n",
++ DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n",
+ phy_freq, vclk_freq);
+
+ /* Check against soc revision/package limits */
+@@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+ }
+
+ for (i = 0 ; params[i].pixel_freq ; ++i) {
+- DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n",
++ DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n",
+ i, params[i].pixel_freq,
+- FREQ_1000_1001(params[i].pixel_freq));
+- DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n",
++ PIXEL_FREQ_1000_1001(params[i].pixel_freq));
++ DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n",
+ i, params[i].phy_freq,
+- FREQ_1000_1001(params[i].phy_freq/10)*10);
++ PHY_FREQ_1000_1001(params[i].phy_freq));
+ /* Match strict frequency */
+ if (phy_freq == params[i].phy_freq &&
+ vclk_freq == params[i].vclk_freq)
+ return MODE_OK;
+ /* Match 1000/1001 variant */
+- if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) &&
+- vclk_freq == FREQ_1000_1001(params[i].vclk_freq))
++ if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) &&
++ vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq))
+ return MODE_OK;
+ }
+
+@@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+ }
+ EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq);
+
+-static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq,
+- unsigned int od1, unsigned int od2, unsigned int od3,
++static void meson_vclk_set(struct meson_drm *priv,
++ unsigned long long pll_base_freq, unsigned int od1,
++ unsigned int od2, unsigned int od3,
+ unsigned int vid_pll_div, unsigned int vclk_div,
+ unsigned int hdmi_tx_div, unsigned int venc_div,
+ bool hdmi_use_enci, bool vic_alternate_clock)
+@@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq,
+ meson_hdmi_pll_generic_set(priv, pll_base_freq);
+ } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) {
+ switch (pll_base_freq) {
+- case 2970000:
++ case 2970000000:
+ m = 0x3d;
+ frac = vic_alternate_clock ? 0xd02 : 0xe00;
+ break;
+- case 4320000:
++ case 4320000000:
+ m = vic_alternate_clock ? 0x59 : 0x5a;
+ frac = vic_alternate_clock ? 0xe8f : 0;
+ break;
+- case 5940000:
++ case 5940000000:
+ m = 0x7b;
+ frac = vic_alternate_clock ? 0xa05 : 0xc00;
+ break;
+@@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq,
+ } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
+ meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) {
+ switch (pll_base_freq) {
+- case 2970000:
++ case 2970000000:
+ m = 0x7b;
+ frac = vic_alternate_clock ? 0x281 : 0x300;
+ break;
+- case 4320000:
++ case 4320000000:
+ m = vic_alternate_clock ? 0xb3 : 0xb4;
+ frac = vic_alternate_clock ? 0x347 : 0;
+ break;
+- case 5940000:
++ case 5940000000:
+ m = 0xf7;
+ frac = vic_alternate_clock ? 0x102 : 0x200;
+ break;
+@@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq,
+ meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3);
+ } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
+ switch (pll_base_freq) {
+- case 2970000:
++ case 2970000000:
+ m = 0x7b;
+ frac = vic_alternate_clock ? 0x140b4 : 0x18000;
+ break;
+- case 4320000:
++ case 4320000000:
+ m = vic_alternate_clock ? 0xb3 : 0xb4;
+ frac = vic_alternate_clock ? 0x1a3ee : 0;
+ break;
+- case 5940000:
++ case 5940000000:
+ m = 0xf7;
+ frac = vic_alternate_clock ? 0x8148 : 0x10000;
+ break;
+@@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq,
+ }
+
+ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
+- unsigned int phy_freq, unsigned int vclk_freq,
+- unsigned int venc_freq, unsigned int dac_freq,
++ unsigned long long phy_freq, unsigned long long vclk_freq,
++ unsigned long long venc_freq, unsigned long long dac_freq,
+ bool hdmi_use_enci)
+ {
+ bool vic_alternate_clock = false;
+- unsigned int freq;
+- unsigned int hdmi_tx_div;
+- unsigned int venc_div;
++ unsigned long long freq;
++ unsigned long long hdmi_tx_div;
++ unsigned long long venc_div;
+
+ if (target == MESON_VCLK_TARGET_CVBS) {
+ meson_venci_cvbs_clock_config(priv);
+@@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
+ return;
+ }
+
+- hdmi_tx_div = vclk_freq / dac_freq;
++ hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq);
+
+ if (hdmi_tx_div == 0) {
+- pr_err("Fatal Error, invalid HDMI-TX freq %d\n",
++ pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n",
+ dac_freq);
+ return;
+ }
+
+- venc_div = vclk_freq / venc_freq;
++ venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq);
+
+ if (venc_div == 0) {
+- pr_err("Fatal Error, invalid HDMI venc freq %d\n",
++ pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n",
+ venc_freq);
+ return;
+ }
+
+ for (freq = 0 ; params[freq].pixel_freq ; ++freq) {
+ if ((phy_freq == params[freq].phy_freq ||
+- phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) &&
++ phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) &&
+ (vclk_freq == params[freq].vclk_freq ||
+- vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) {
++ vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) {
+ if (vclk_freq != params[freq].vclk_freq)
+ vic_alternate_clock = true;
+ else
+@@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
+ }
+
+ if (!params[freq].pixel_freq) {
+- pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq);
++ pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n",
++ vclk_freq);
+ return;
+ }
+
+diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h
+index 60617aaf18dd1..7ac55744e5749 100644
+--- a/drivers/gpu/drm/meson/meson_vclk.h
++++ b/drivers/gpu/drm/meson/meson_vclk.h
+@@ -20,17 +20,18 @@ enum {
+ };
+
+ /* 27MHz is the CVBS Pixel Clock */
+-#define MESON_VCLK_CVBS 27000
++#define MESON_VCLK_CVBS (27 * 1000 * 1000)
+
+ enum drm_mode_status
+-meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq);
++meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq);
+ enum drm_mode_status
+-meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+- unsigned int vclk_freq);
++meson_vclk_vic_supported_freq(struct meson_drm *priv,
++ unsigned long long phy_freq,
++ unsigned long long vclk_freq);
+
+ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
+- unsigned int phy_freq, unsigned int vclk_freq,
+- unsigned int venc_freq, unsigned int dac_freq,
++ unsigned long long phy_freq, unsigned long long vclk_freq,
++ unsigned long long venc_freq, unsigned long long dac_freq,
+ bool hdmi_use_enci);
+
+ #endif /* __MESON_VCLK_H */
+--
+2.39.5
+
--- /dev/null
+From 256a362e4674a14e838047c10ce2f4286c807e46 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 02:30:34 +0100
+Subject: fix a couple of races in MNT_TREE_BENEATH handling by do_move_mount()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit 0d039eac6e5950f9d1ecc9e410c2fd1feaeab3b6 ]
+
+Normally do_lock_mount(path, _) is locking a mountpoint pinned by
+*path and at the time when matching unlock_mount() unlocks that
+location it is still pinned by the same thing.
+
+Unfortunately, for 'beneath' case it's no longer that simple -
+the object being locked is not the one *path points to. It's the
+mountpoint of path->mnt. The thing is, without sufficient locking
+->mnt_parent may change under us and none of the locks are held
+at that point. The rules are
+ * mount_lock stabilizes m->mnt_parent for any mount m.
+ * namespace_sem stabilizes m->mnt_parent, provided that
+m is mounted.
+ * if either of the above holds and refcount of m is positive,
+we are guaranteed the same for refcount of m->mnt_parent.
+
+namespace_sem nests inside inode_lock(), so do_lock_mount() has
+to take inode_lock() before grabbing namespace_sem. It does
+recheck that path->mnt is still mounted in the same place after
+getting namespace_sem, and it does take care to pin the dentry.
+It is needed, since otherwise we might end up with racing mount --move
+(or umount) happening while we were getting locks; in that case
+dentry would no longer be a mountpoint and could've been evicted
+on memory pressure along with its inode - not something you want
+when grabbing lock on that inode.
+
+However, pinning a dentry is not enough - the matching mount is
+also pinned only by the fact that path->mnt is mounted on top it
+and at that point we are not holding any locks whatsoever, so
+the same kind of races could end up with all references to
+that mount gone just as we are about to enter inode_lock().
+If that happens, we are left with filesystem being shut down while
+we are holding a dentry reference on it; results are not pretty.
+
+What we need to do is grab both dentry and mount at the same time;
+that makes inode_lock() safe *and* avoids the problem with fs getting
+shut down under us. After taking namespace_sem we verify that
+path->mnt is still mounted (which stabilizes its ->mnt_parent) and
+check that it's still mounted at the same place. From that point
+on to the matching namespace_unlock() we are guaranteed that
+mount/dentry pair we'd grabbed are also pinned by being the mountpoint
+of path->mnt, so we can quietly drop both the dentry reference (as
+the current code does) and mnt one - it's OK to do under namespace_sem,
+since we are not dropping the final refs.
+
+That solves the problem on do_lock_mount() side; unlock_mount()
+also has one, since dentry is guaranteed to stay pinned only until
+the namespace_unlock(). That's easy to fix - just have inode_unlock()
+done earlier, while it's still pinned by mp->m_dentry.
+
+Fixes: 6ac392815628 "fs: allow to mount beneath top mount" # v6.5+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 69 ++++++++++++++++++++++++++------------------------
+ 1 file changed, 36 insertions(+), 33 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index d401486fe95d1..280a6ebc46d93 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2640,56 +2640,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
+ struct vfsmount *mnt = path->mnt;
+ struct dentry *dentry;
+ struct mountpoint *mp = ERR_PTR(-ENOENT);
++ struct path under = {};
+
+ for (;;) {
+- struct mount *m;
++ struct mount *m = real_mount(mnt);
+
+ if (beneath) {
+- m = real_mount(mnt);
++ path_put(&under);
+ read_seqlock_excl(&mount_lock);
+- dentry = dget(m->mnt_mountpoint);
++ under.mnt = mntget(&m->mnt_parent->mnt);
++ under.dentry = dget(m->mnt_mountpoint);
+ read_sequnlock_excl(&mount_lock);
++ dentry = under.dentry;
+ } else {
+ dentry = path->dentry;
+ }
+
+ inode_lock(dentry->d_inode);
+- if (unlikely(cant_mount(dentry))) {
+- inode_unlock(dentry->d_inode);
+- goto out;
+- }
+-
+ namespace_lock();
+
+- if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
++ if (unlikely(cant_mount(dentry) || !is_mounted(mnt)))
++ break; // not to be mounted on
++
++ if (beneath && unlikely(m->mnt_mountpoint != dentry ||
++ &m->mnt_parent->mnt != under.mnt)) {
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+- goto out;
++ continue; // got moved
+ }
+
+ mnt = lookup_mnt(path);
+- if (likely(!mnt))
++ if (unlikely(mnt)) {
++ namespace_unlock();
++ inode_unlock(dentry->d_inode);
++ path_put(path);
++ path->mnt = mnt;
++ path->dentry = dget(mnt->mnt_root);
++ continue; // got overmounted
++ }
++ mp = get_mountpoint(dentry);
++ if (IS_ERR(mp))
+ break;
+-
+- namespace_unlock();
+- inode_unlock(dentry->d_inode);
+- if (beneath)
+- dput(dentry);
+- path_put(path);
+- path->mnt = mnt;
+- path->dentry = dget(mnt->mnt_root);
+- }
+-
+- mp = get_mountpoint(dentry);
+- if (IS_ERR(mp)) {
+- namespace_unlock();
+- inode_unlock(dentry->d_inode);
++ if (beneath) {
++ /*
++ * @under duplicates the references that will stay
++ * at least until namespace_unlock(), so the path_put()
++ * below is safe (and OK to do under namespace_lock -
++ * we are not dropping the final references here).
++ */
++ path_put(&under);
++ }
++ return mp;
+ }
+-
+-out:
++ namespace_unlock();
++ inode_unlock(dentry->d_inode);
+ if (beneath)
+- dput(dentry);
+-
++ path_put(&under);
+ return mp;
+ }
+
+@@ -2700,14 +2706,11 @@ static inline struct mountpoint *lock_mount(struct path *path)
+
+ static void unlock_mount(struct mountpoint *where)
+ {
+- struct dentry *dentry = where->m_dentry;
+-
++ inode_unlock(where->m_dentry->d_inode);
+ read_seqlock_excl(&mount_lock);
+ put_mountpoint(where);
+ read_sequnlock_excl(&mount_lock);
+-
+ namespace_unlock();
+- inode_unlock(dentry->d_inode);
+ }
+
+ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+--
+2.39.5
+
--- /dev/null
+From 04df2853e177fb661b6fab3c606541ac28d30881 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 15:22:47 +0200
+Subject: fs/xattr: Fix handling of AT_FDCWD in setxattrat(2) and getxattrat(2)
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit f520bed25d17bb31c2d2d72b0a785b593a4e3179 ]
+
+Currently, setxattrat(2) and getxattrat(2) are wrongly handling the
+calls of the from setxattrat(AF_FDCWD, NULL, AT_EMPTY_PATH, ...) and
+fail with -EBADF error instead of operating on CWD. Fix it.
+
+Fixes: 6140be90ec70 ("fs/xattr: add *at family syscalls")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/20250424132246.16822-2-jack@suse.cz
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xattr.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/xattr.c b/fs/xattr.c
+index 02bee149ad967..fabb2a04501ee 100644
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname,
+ return error;
+
+ filename = getname_maybe_null(pathname, at_flags);
+- if (!filename) {
++ if (!filename && dfd >= 0) {
+ CLASS(fd, f)(dfd);
+ if (fd_empty(f))
+ error = -EBADF;
+@@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname,
+ return error;
+
+ filename = getname_maybe_null(pathname, at_flags);
+- if (!filename) {
++ if (!filename && dfd >= 0) {
+ CLASS(fd, f)(dfd);
+ if (fd_empty(f))
+ return -EBADF;
+--
+2.39.5
+
--- /dev/null
+From 56dda222aa207c6abc0196697b72a678cda1716a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Apr 2025 12:38:20 -0700
+Subject: iommu/amd: Return an error if vCPU affinity is set for non-vCPU IRTE
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 07172206a26dcf3f0bf7c3ecaadd4242b008ea54 ]
+
+Return -EINVAL instead of success if amd_ir_set_vcpu_affinity() is
+invoked without use_vapic; lying to KVM about whether or not the IRTE was
+configured to post IRQs is all kinds of bad.
+
+Fixes: d98de49a53e4 ("iommu/amd: Enable vAPIC interrupt remapping mode by default")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-ID: <20250404193923.1413163-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/amd/iommu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
+index cd5116d8c3b28..b3a01b7757ee1 100644
+--- a/drivers/iommu/amd/iommu.c
++++ b/drivers/iommu/amd/iommu.c
+@@ -3850,7 +3850,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+ * we should not modify the IRTE
+ */
+ if (!dev_data || !dev_data->use_vapic)
+- return 0;
++ return -EINVAL;
+
+ ir_data->cfg = irqd_cfg(data);
+ pi_data->ir_data = ir_data;
+--
+2.39.5
+
--- /dev/null
+From 8a8de2577691defaa57683565d97aa2291292f65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 20:15:41 +0800
+Subject: LoongArch: Make do_xyz() exception handlers more robust
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+[ Upstream commit cc73cc6bcdb5f959670e3ff9abdc62461452ddff ]
+
+Currently, interrupts need to be disabled before single-step mode is
+set, it requires that CSR_PRMD_PIE be cleared in save_local_irqflag()
+which is called by setup_singlestep(), this is reasonable.
+
+But in the first kprobe breakpoint exception, if the irq is enabled at
+the beginning of do_bp(), it will not be disabled at the end of do_bp()
+due to the CSR_PRMD_PIE has been cleared in save_local_irqflag(). So for
+this case, it may corrupt exception context when restoring the exception
+after do_bp() in handle_bp(), this is not reasonable.
+
+In order to restore exception safely in handle_bp(), it needs to ensure
+the irq is disabled at the end of do_bp(), so just add a local variable
+to record the original interrupt status in the parent context, then use
+it as the check condition to enable and disable irq in do_bp().
+
+While at it, do the similar thing for other do_xyz() exception handlers
+to make them more robust.
+
+Fixes: 6d4cc40fb5f5 ("LoongArch: Add kprobes support")
+Suggested-by: Jinyang He <hejinyang@loongson.cn>
+Suggested-by: Huacai Chen <chenhuacai@loongson.cn>
+Co-developed-by: Tianyang Zhang <zhangtianyang@loongson.cn>
+Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/loongarch/kernel/traps.c | 20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
+index 2ec3106c0da3d..47fc2de6d1501 100644
+--- a/arch/loongarch/kernel/traps.c
++++ b/arch/loongarch/kernel/traps.c
+@@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
+ die_if_kernel("Kernel ale access", regs);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
+ #else
++ bool pie = regs_irqs_disabled(regs);
+ unsigned int *pc;
+
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_enable();
+
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
+@@ -582,7 +583,7 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
+ die_if_kernel("Kernel ale access", regs);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
+ out:
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_disable();
+ #endif
+ irqentry_exit(regs, state);
+@@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs)
+ asmlinkage void noinstr do_bce(struct pt_regs *regs)
+ {
+ bool user = user_mode(regs);
++ bool pie = regs_irqs_disabled(regs);
+ unsigned long era = exception_era(regs);
+ u64 badv = 0, lower = 0, upper = ULONG_MAX;
+ union loongarch_instruction insn;
+ irqentry_state_t state = irqentry_enter(regs);
+
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_enable();
+
+ current->thread.trap_nr = read_csr_excode();
+@@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs)
+ force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper);
+
+ out:
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+@@ -710,11 +712,12 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs)
+ asmlinkage void noinstr do_bp(struct pt_regs *regs)
+ {
+ bool user = user_mode(regs);
++ bool pie = regs_irqs_disabled(regs);
+ unsigned int opcode, bcode;
+ unsigned long era = exception_era(regs);
+ irqentry_state_t state = irqentry_enter(regs);
+
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_enable();
+
+ if (__get_inst(&opcode, (u32 *)era, user))
+@@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
+ }
+
+ out:
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+@@ -1015,6 +1018,7 @@ static void init_restore_lbt(void)
+
+ asmlinkage void noinstr do_lbt(struct pt_regs *regs)
+ {
++ bool pie = regs_irqs_disabled(regs);
+ irqentry_state_t state = irqentry_enter(regs);
+
+ /*
+@@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs)
+ * (including the user using 'MOVGR2GCSR' to turn on TM, which
+ * will not trigger the BTE), we need to check PRMD first.
+ */
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_enable();
+
+ if (!cpu_has_lbt) {
+@@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs)
+ preempt_enable();
+
+ out:
+- if (regs->csr_prmd & CSR_PRMD_PIE)
++ if (!pie)
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+--
+2.39.5
+
--- /dev/null
+From 61a82ce754843638c828e0a2df75e08826586e0c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 20:15:41 +0800
+Subject: LoongArch: Make regs_irqs_disabled() more clear
+
+From: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+[ Upstream commit bb0511d59db9b3e40c8d51f0d151ccd0fd44071d ]
+
+In the current code, the definition of regs_irqs_disabled() is actually
+"!(regs->csr_prmd & CSR_CRMD_IE)" because arch_irqs_disabled_flags() is
+defined as "!(flags & CSR_CRMD_IE)", it looks a little strange.
+
+Define regs_irqs_disabled() as !(regs->csr_prmd & CSR_PRMD_PIE) directly
+to make it more clear, no functional change.
+
+While at it, the return value of regs_irqs_disabled() is true or false,
+so change its type to reflect that and also make it always inline.
+
+Fixes: 803b0fc5c3f2 ("LoongArch: Add process management")
+Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/loongarch/include/asm/ptrace.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
+index f3ddaed9ef7f0..a5b63c84f8541 100644
+--- a/arch/loongarch/include/asm/ptrace.h
++++ b/arch/loongarch/include/asm/ptrace.h
+@@ -33,9 +33,9 @@ struct pt_regs {
+ unsigned long __last[];
+ } __aligned(8);
+
+-static inline int regs_irqs_disabled(struct pt_regs *regs)
++static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+ {
+- return arch_irqs_disabled_flags(regs->csr_prmd);
++ return !(regs->csr_prmd & CSR_PRMD_PIE);
+ }
+
+ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+--
+2.39.5
+
--- /dev/null
+From baeab4292eb3bbe2b1985c049105f5a53d987d2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Apr 2025 20:15:22 +0800
+Subject: LoongArch: Select ARCH_USE_MEMTEST
+
+From: Yuli Wang <wangyuli@uniontech.com>
+
+[ Upstream commit fb8e9f59d6f292c3d9fea6c155c22ea5fc3053ab ]
+
+As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"),
+architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST.
+
+Commit 628c3bb40e9a ("LoongArch: Add boot and setup routines") added
+support for early_memtest but did not select ARCH_USE_MEMTESET.
+
+Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines")
+Tested-by: Erpeng Xu <xuerpeng@uniontech.com>
+Tested-by: Yuli Wang <wangyuli@uniontech.com>
+Signed-off-by: Yuli Wang <wangyuli@uniontech.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/loongarch/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
+index bdb989c49c094..b744bd73f08ee 100644
+--- a/arch/loongarch/Kconfig
++++ b/arch/loongarch/Kconfig
+@@ -71,6 +71,7 @@ config LOONGARCH
+ select ARCH_SUPPORTS_RT
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF
++ select ARCH_USE_MEMTEST
+ select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_WANT_DEFAULT_BPF_JIT
+--
+2.39.5
+
--- /dev/null
+From c848b90f5f89d68c3a1ca4f364eae1c1723ef77b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 06:47:24 +0200
+Subject: net: dp83822: Fix OF_MDIO config check
+
+From: Johannes Schneider <johannes.schneider@leica-geosystems.com>
+
+[ Upstream commit 607b310ada5ef4c738f9dffc758a62a9d309b084 ]
+
+When CONFIG_OF_MDIO is set to be a module the code block is not
+compiled. Use the IS_ENABLED macro that checks for both built in as
+well as module.
+
+Fixes: 5dc39fd5ef35 ("net: phy: DP83822: Add ability to advertise Fiber connection")
+Signed-off-by: Johannes Schneider <johannes.schneider@leica-geosystems.com>
+Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/20250423044724.1284492-1-johannes.schneider@leica-geosystems.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83822.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
+index 3662f3905d5ad..89094aa6dbbdb 100644
+--- a/drivers/net/phy/dp83822.c
++++ b/drivers/net/phy/dp83822.c
+@@ -730,7 +730,7 @@ static int dp83822_phy_reset(struct phy_device *phydev)
+ return phydev->drv->config_init(phydev);
+ }
+
+-#ifdef CONFIG_OF_MDIO
++#if IS_ENABLED(CONFIG_OF_MDIO)
+ static const u32 tx_amplitude_100base_tx_gain[] = {
+ 80, 82, 83, 85, 87, 88, 90, 92,
+ 93, 95, 97, 98, 100, 102, 103, 105,
+--
+2.39.5
+
--- /dev/null
+From 1d7ada5ce471966bd32c227dc6a731c71e00d1a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Apr 2025 04:10:20 +0100
+Subject: net: dsa: mt7530: sync driver-specific behavior of MT7531 variants
+
+From: Daniel Golle <daniel@makrotopia.org>
+
+[ Upstream commit 497041d763016c2e8314d2f6a329a9b77c3797ca ]
+
+MT7531 standalone and MMIO variants found in MT7988 and EN7581 share
+most basic properties. Despite that, assisted_learning_on_cpu_port and
+mtu_enforcement_ingress were only applied for MT7531 but not for MT7988
+or EN7581, causing the expected issues on MMIO devices.
+
+Apply both settings equally also for MT7988 and EN7581 by moving both
+assignments form mt7531_setup() to mt7531_setup_common().
+
+This fixes unwanted flooding of packets due to unknown unicast
+during DA lookup, as well as issues with heterogenous MTU settings.
+
+Fixes: 7f54cc9772ce ("net: dsa: mt7530: split-off common parts from mt7531_setup")
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Reviewed-by: Chester A. Unal <chester.a.unal@arinc9.com>
+Link: https://patch.msgid.link/89ed7ec6d4fa0395ac53ad2809742bb1ce61ed12.1745290867.git.daniel@makrotopia.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mt7530.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
+index 5883eb93efb11..22513f3d56db1 100644
+--- a/drivers/net/dsa/mt7530.c
++++ b/drivers/net/dsa/mt7530.c
+@@ -2541,6 +2541,9 @@ mt7531_setup_common(struct dsa_switch *ds)
+ struct mt7530_priv *priv = ds->priv;
+ int ret, i;
+
++ ds->assisted_learning_on_cpu_port = true;
++ ds->mtu_enforcement_ingress = true;
++
+ mt753x_trap_frames(priv);
+
+ /* Enable and reset MIB counters */
+@@ -2688,9 +2691,6 @@ mt7531_setup(struct dsa_switch *ds)
+ if (ret)
+ return ret;
+
+- ds->assisted_learning_on_cpu_port = true;
+- ds->mtu_enforcement_ingress = true;
+-
+ return 0;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From d852e27e977f491ed5d44ac00b2b012822732602 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 15:00:05 +0300
+Subject: net: enetc: fix frame corruption on bpf_xdp_adjust_head/tail() and
+ XDP_PASS
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 020f0c8b3d396ec8190948f86063e1c45133f839 ]
+
+Vlatko Markovikj reported that XDP programs attached to ENETC do not
+work well if they use bpf_xdp_adjust_head() or bpf_xdp_adjust_tail(),
+combined with the XDP_PASS verdict. A typical use case is to add or
+remove a VLAN tag.
+
+The resulting sk_buff passed to the stack is corrupted, because the
+algorithm used by the driver for XDP_PASS is to unwind the current
+buffer pointer in the RX ring and to re-process the current frame with
+enetc_build_skb() as if XDP hadn't run. That is incorrect because XDP
+may have modified the geometry of the buffer, which we then are
+completely unaware of. We are looking at a modified buffer with the
+original geometry.
+
+The initial reaction, both from me and from Vlatko, was to shop around
+the kernel for code to steal that would calculate a delta between the
+old and the new XDP buffer geometry, and apply that to the sk_buff too.
+We noticed that veth and generic xdp have such code.
+
+The headroom adjustment is pretty uncontroversial, but what turned out
+severely problematic is the tailroom.
+
+veth has this snippet:
+
+ __skb_put(skb, off); /* positive on grow, negative on shrink */
+
+which on first sight looks decent enough, except __skb_put() takes an
+"unsigned int" for the second argument, and the arithmetic seems to only
+work correctly by coincidence. Second issue, __skb_put() contains a
+SKB_LINEAR_ASSERT(). It's not a great pattern to make more widespread.
+The skb may still be nonlinear at that point - it only becomes linear
+later when resetting skb->data_len to zero.
+
+To avoid the above, bpf_prog_run_generic_xdp() does this instead:
+
+ skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
+ skb->len += off; /* positive on grow, negative on shrink */
+
+which is more open-coded, uses lower-level functions and is in general a
+bit too much to spread around in driver code.
+
+Then there is the snippet:
+
+ if (xdp_buff_has_frags(xdp))
+ skb->data_len = skb_shinfo(skb)->xdp_frags_size;
+ else
+ skb->data_len = 0;
+
+One would have expected __pskb_trim() to be the function of choice for
+this task. But it's not used in veth/xdpgeneric because the extraneous
+fragments were _already_ freed by bpf_xdp_adjust_tail() ->
+bpf_xdp_frags_shrink_tail() -> ... -> __xdp_return() - the backing
+memory for the skb frags and the xdp frags is the same, but they don't
+keep individual references.
+
+In fact, that is the biggest reason why this snippet cannot be reused
+as-is, because ENETC temporarily constructs an skb with the original len
+and the original number of frags. Because the extraneous frags are
+already freed by bpf_xdp_adjust_tail() and returned to the page
+allocator, it means the entire approach of using enetc_build_skb() is
+questionable for XDP_PASS. To avoid that, one would need to elevate the
+page refcount of all frags before calling bpf_prog_run_xdp() and drop it
+after XDP_PASS.
+
+There are other things that are missing in ENETC's handling of XDP_PASS,
+like for example updating skb_shinfo(skb)->meta_len.
+
+These are all handled correctly and cleanly in commit 539c1fba1ac7
+("xdp: add generic xdp_build_skb_from_buff()"), added to net-next in
+Dec 2024, and in addition might even be quicker that way. I have a very
+strong preference towards backporting that commit for "stable", and that
+is what is used to fix the handling bugs. It is way too messy to go
+this deep into the guts of an sk_buff from the code of a device driver.
+
+Fixes: d1b15102dd16 ("net: enetc: add support for XDP_DROP and XDP_PASS")
+Reported-by: Vlatko Markovikj <vlatko.markovikj@etas.com>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Link: https://patch.msgid.link/20250417120005.3288549-4-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc.c | 26 +++++++++++---------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
+index 74721995cb1f9..3ee52f4b11660 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -1878,11 +1878,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+
+ while (likely(rx_frm_cnt < work_limit)) {
+ union enetc_rx_bd *rxbd, *orig_rxbd;
+- int orig_i, orig_cleaned_cnt;
+ struct xdp_buff xdp_buff;
+ struct sk_buff *skb;
++ int orig_i, err;
+ u32 bd_status;
+- int err;
+
+ rxbd = enetc_rxbd(rx_ring, i);
+ bd_status = le32_to_cpu(rxbd->r.lstatus);
+@@ -1897,7 +1896,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ break;
+
+ orig_rxbd = rxbd;
+- orig_cleaned_cnt = cleaned_cnt;
+ orig_i = i;
+
+ enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i,
+@@ -1925,15 +1923,21 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ rx_ring->stats.xdp_drops++;
+ break;
+ case XDP_PASS:
+- rxbd = orig_rxbd;
+- cleaned_cnt = orig_cleaned_cnt;
+- i = orig_i;
+-
+- skb = enetc_build_skb(rx_ring, bd_status, &rxbd,
+- &i, &cleaned_cnt,
+- ENETC_RXB_DMA_SIZE_XDP);
+- if (unlikely(!skb))
++ skb = xdp_build_skb_from_buff(&xdp_buff);
++ /* Probably under memory pressure, stop NAPI */
++ if (unlikely(!skb)) {
++ enetc_xdp_drop(rx_ring, orig_i, i);
++ rx_ring->stats.xdp_drops++;
+ goto out;
++ }
++
++ enetc_get_offloads(rx_ring, orig_rxbd, skb);
++
++ /* These buffers are about to be owned by the stack.
++ * Update our buffer cache (the rx_swbd array elements)
++ * with their other page halves.
++ */
++ enetc_bulk_flip_buff(rx_ring, orig_i, i);
+
+ napi_gro_receive(napi, skb);
+ break;
+--
+2.39.5
+
--- /dev/null
+From 4cb401e4bea56e9115b729dcb195ba6117deb103 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 15:00:04 +0300
+Subject: net: enetc: refactor bulk flipping of RX buffers to separate function
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1d587faa5be7e9785b682cc5f58ba8f4100c13ea ]
+
+This small snippet of code ensures that we do something with the array
+of RX software buffer descriptor elements after passing the skb to the
+stack. In this case, we see if the other half of the page is reusable,
+and if so, we "turn around" the buffers, making them directly usable by
+enetc_refill_rx_ring() without going to enetc_new_page().
+
+We will need to perform this kind of buffer flipping from a new code
+path, i.e. from XDP_PASS. Currently, enetc_build_skb() does it there
+buffer by buffer, but in a subsequent change we will stop using
+enetc_build_skb() for XDP_PASS.
+
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Link: https://patch.msgid.link/20250417120005.3288549-3-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 020f0c8b3d39 ("net: enetc: fix frame corruption on bpf_xdp_adjust_head/tail() and XDP_PASS")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
+index 9b333254c73ec..74721995cb1f9 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
+ }
+ }
+
++static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first,
++ int rx_ring_last)
++{
++ while (rx_ring_first != rx_ring_last) {
++ enetc_flip_rx_buff(rx_ring,
++ &rx_ring->rx_swbd[rx_ring_first]);
++ enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
++ }
++}
++
+ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ struct napi_struct *napi, int work_limit,
+ struct bpf_prog *prog)
+@@ -1965,11 +1975,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+ enetc_xdp_drop(rx_ring, orig_i, i);
+ rx_ring->stats.xdp_redirect_failures++;
+ } else {
+- while (orig_i != i) {
+- enetc_flip_rx_buff(rx_ring,
+- &rx_ring->rx_swbd[orig_i]);
+- enetc_bdr_idx_inc(rx_ring, &orig_i);
+- }
++ enetc_bulk_flip_buff(rx_ring, orig_i, i);
+ xdp_redirect_frm_cnt++;
+ rx_ring->stats.xdp_redirect++;
+ }
+--
+2.39.5
+
--- /dev/null
+From cd67d49f4c04421eebcf156855b6fa70006e0b62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 15:00:03 +0300
+Subject: net: enetc: register XDP RX queues with frag_size
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 2768b2e2f7d25ae8984ebdcde8ec1014b6fdcd89 ]
+
+At the time when bpf_xdp_adjust_tail() gained support for non-linear
+buffers, ENETC was already generating this kind of geometry on RX, due
+to its use of 2K half page buffers. Frames larger than 1472 bytes
+(without FCS) are stored as multi-buffer, presenting a need for multi
+buffer support to work properly even in standard MTU circumstances.
+
+Allow bpf_xdp_frags_increase_tail() to know the allocation size of paged
+data, so it can safely permit growing the tailroom of the buffer from
+XDP programs.
+
+Fixes: bf25146a5595 ("bpf: add frags support to the bpf_xdp_adjust_tail() API")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Link: https://patch.msgid.link/20250417120005.3288549-2-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
+index 2106861463e40..9b333254c73ec 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -3362,7 +3362,8 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i,
+ bdr->buffer_offset = ENETC_RXB_PAD;
+ priv->rx_ring[i] = bdr;
+
+- err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0);
++ err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0,
++ ENETC_RXB_DMA_SIZE_XDP);
+ if (err)
+ goto free_vector;
+
+--
+2.39.5
+
--- /dev/null
+From 9d1b1f49920b24beca21927e8e14b8a1ad97d77b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 17:41:07 +0100
+Subject: net: ethernet: mtk_eth_soc: net: revise NETSYSv3 hardware
+ configuration
+
+From: Bo-Cun Chen <bc-bocun.chen@mediatek.com>
+
+[ Upstream commit 491ef1117c56476f199b481f8c68820fe4c3a7c2 ]
+
+Change hardware configuration for the NETSYSv3.
+ - Enable PSE dummy page mechanism for the GDM1/2/3
+ - Enable PSE drop mechanism when the WDMA Rx ring full
+ - Enable PSE no-drop mechanism for packets from the WDMA Tx
+ - Correct PSE free drop threshold
+ - Correct PSE CDMA high threshold
+
+Fixes: 1953f134a1a8b ("net: ethernet: mtk_eth_soc: add NETSYS_V3 version support")
+Signed-off-by: Bo-Cun Chen <bc-bocun.chen@mediatek.com>
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/b71f8fd9d4bb69c646c4d558f9331dd965068606.1744907886.git.daniel@makrotopia.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 24 +++++++++++++++++----
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 10 ++++++++-
+ 2 files changed, 29 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 0cd1ecacfd29f..477b8732b8609 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3997,11 +3997,27 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
+ mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
+
+ if (mtk_is_netsys_v3_or_greater(eth)) {
+- /* PSE should not drop port1, port8 and port9 packets */
+- mtk_w32(eth, 0x00000302, PSE_DROP_CFG);
++ /* PSE dummy page mechanism */
++ mtk_w32(eth, PSE_DUMMY_WORK_GDM(1) | PSE_DUMMY_WORK_GDM(2) |
++ PSE_DUMMY_WORK_GDM(3) | DUMMY_PAGE_THR, PSE_DUMY_REQ);
++
++ /* PSE free buffer drop threshold */
++ mtk_w32(eth, 0x00600009, PSE_IQ_REV(8));
++
++ /* PSE should not drop port8, port9 and port13 packets from
++ * WDMA Tx
++ */
++ mtk_w32(eth, 0x00002300, PSE_DROP_CFG);
++
++ /* PSE should drop packets to port8, port9 and port13 on WDMA Rx
++ * ring full
++ */
++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(0));
++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(1));
++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(2));
+
+ /* GDM and CDM Threshold */
+- mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES);
++ mtk_w32(eth, 0x08000707, MTK_CDMW0_THRES);
+ mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES);
+
+ /* Disable GDM1 RX CRC stripping */
+@@ -4018,7 +4034,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset)
+ mtk_w32(eth, 0x00000300, PSE_DROP_CFG);
+
+ /* PSE should drop packets to port 8/9 on WDMA Rx ring full */
+- mtk_w32(eth, 0x00000300, PSE_PPE0_DROP);
++ mtk_w32(eth, 0x00000300, PSE_PPE_DROP(0));
+
+ /* PSE Free Queue Flow Control */
+ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2);
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index 8d7b6818d8601..0570623e569d5 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -151,7 +151,15 @@
+ #define PSE_FQFC_CFG1 0x100
+ #define PSE_FQFC_CFG2 0x104
+ #define PSE_DROP_CFG 0x108
+-#define PSE_PPE0_DROP 0x110
++#define PSE_PPE_DROP(x) (0x110 + ((x) * 0x4))
++
++/* PSE Last FreeQ Page Request Control */
++#define PSE_DUMY_REQ 0x10C
++/* PSE_DUMY_REQ is not a typo but actually called like that also in
++ * MediaTek's datasheet
++ */
++#define PSE_DUMMY_WORK_GDM(x) BIT(16 + (x))
++#define DUMMY_PAGE_THR 0x1
+
+ /* PSE Input Queue Reservation Register*/
+ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2))
+--
+2.39.5
+
--- /dev/null
+From 253b7459844f66fe63791b44af7764e1c9d06f6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Apr 2025 18:07:16 +0200
+Subject: net: lwtunnel: disable BHs when required
+
+From: Justin Iurman <justin.iurman@uliege.be>
+
+[ Upstream commit c03a49f3093a4903c8a93c8b5c9a297b5343b169 ]
+
+In lwtunnel_{output|xmit}(), dev_xmit_recursion() may be called in
+preemptible scope for PREEMPT kernels. This patch disables BHs before
+calling dev_xmit_recursion(). BHs are re-enabled only at the end, since
+we must ensure the same CPU is used for both dev_xmit_recursion_inc()
+and dev_xmit_recursion_dec() (and any other recursion levels in some
+cases) in order to maintain valid per-cpu counters.
+
+Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Closes: https://lore.kernel.org/netdev/CAADnVQJFWn3dBFJtY+ci6oN1pDFL=TzCmNbRgey7MdYxt_AP2g@mail.gmail.com/
+Reported-by: Eduard Zingerman <eddyz87@gmail.com>
+Closes: https://lore.kernel.org/netdev/m2h62qwf34.fsf@gmail.com/
+Fixes: 986ffb3a57c5 ("net: lwtunnel: fix recursion loops")
+Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250416160716.8823-1-justin.iurman@uliege.be
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/lwtunnel.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
+index 4417a18b3e951..f63586c9ce021 100644
+--- a/net/core/lwtunnel.c
++++ b/net/core/lwtunnel.c
+@@ -332,6 +332,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ struct dst_entry *dst;
+ int ret;
+
++ local_bh_disable();
++
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+@@ -347,8 +349,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+- lwtstate->type > LWTUNNEL_ENCAP_MAX)
+- return 0;
++ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
++ ret = 0;
++ goto out;
++ }
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+@@ -363,11 +367,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+- return ret;
++ goto out;
+
+ drop:
+ kfree_skb(skb);
+
++out:
++ local_bh_enable();
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(lwtunnel_output);
+@@ -379,6 +385,8 @@ int lwtunnel_xmit(struct sk_buff *skb)
+ struct dst_entry *dst;
+ int ret;
+
++ local_bh_disable();
++
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+@@ -395,8 +403,10 @@ int lwtunnel_xmit(struct sk_buff *skb)
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+- lwtstate->type > LWTUNNEL_ENCAP_MAX)
+- return 0;
++ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
++ ret = 0;
++ goto out;
++ }
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+@@ -411,11 +421,13 @@ int lwtunnel_xmit(struct sk_buff *skb)
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+- return ret;
++ goto out;
+
+ drop:
+ kfree_skb(skb);
+
++out:
++ local_bh_enable();
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(lwtunnel_xmit);
+@@ -427,6 +439,8 @@ int lwtunnel_input(struct sk_buff *skb)
+ struct dst_entry *dst;
+ int ret;
+
++ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
++
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+--
+2.39.5
+
--- /dev/null
+From a6d591ccb91e2f8826e30037710ae902df1ba116 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Apr 2025 10:38:13 +0800
+Subject: net/mlx5: Fix null-ptr-deref in mlx5_create_{inner_,}ttc_table()
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit 91037037ee3d611ce17f39d75f79c7de394b122a ]
+
+Add NULL check for mlx5_get_flow_namespace() returns in
+mlx5_create_inner_ttc_table() and mlx5_create_ttc_table() to prevent
+NULL pointer dereference.
+
+Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250418023814.71789-2-bsdhenrymartin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+index 9f13cea164465..510879e1ba30e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+@@ -636,6 +636,11 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ }
+
+ ns = mlx5_get_flow_namespace(dev, params->ns_type);
++ if (!ns) {
++ kvfree(ttc);
++ return ERR_PTR(-EOPNOTSUPP);
++ }
++
+ groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] :
+ &inner_ttc_groups[TTC_GROUPS_DEFAULT];
+
+@@ -709,6 +714,11 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ }
+
+ ns = mlx5_get_flow_namespace(dev, params->ns_type);
++ if (!ns) {
++ kvfree(ttc);
++ return ERR_PTR(-EOPNOTSUPP);
++ }
++
+ groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] :
+ &ttc_groups[TTC_GROUPS_DEFAULT];
+
+--
+2.39.5
+
--- /dev/null
+From 5ab456f8dc68de9dfebb90b6fd338f174547ce5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Apr 2025 10:38:14 +0800
+Subject: net/mlx5: Move ttc allocation after switch case to prevent leaks
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit fa8fd315127ca48c65e7e6692a84ffcf3d07168e ]
+
+Relocate the memory allocation for ttc table after the switch statement
+that validates params->ns_type in both mlx5_create_inner_ttc_table() and
+mlx5_create_ttc_table(). This ensures memory is only allocated after
+confirming valid input, eliminating potential memory leaks when invalid
+ns_type cases occur.
+
+Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Link: https://patch.msgid.link/20250418023814.71789-3-bsdhenrymartin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+index 510879e1ba30e..43b2216bc0a22 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+@@ -618,10 +618,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ bool use_l4_type;
+ int err;
+
+- ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+- if (!ttc)
+- return ERR_PTR(-ENOMEM);
+-
+ switch (params->ns_type) {
+ case MLX5_FLOW_NAMESPACE_PORT_SEL:
+ use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) &&
+@@ -635,6 +631,10 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ return ERR_PTR(-EINVAL);
+ }
+
++ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
++ if (!ttc)
++ return ERR_PTR(-ENOMEM);
++
+ ns = mlx5_get_flow_namespace(dev, params->ns_type);
+ if (!ns) {
+ kvfree(ttc);
+@@ -696,10 +696,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ bool use_l4_type;
+ int err;
+
+- ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+- if (!ttc)
+- return ERR_PTR(-ENOMEM);
+-
+ switch (params->ns_type) {
+ case MLX5_FLOW_NAMESPACE_PORT_SEL:
+ use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) &&
+@@ -713,6 +709,10 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ return ERR_PTR(-EINVAL);
+ }
+
++ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
++ if (!ttc)
++ return ERR_PTR(-ENOMEM);
++
+ ns = mlx5_get_flow_namespace(dev, params->ns_type);
+ if (!ns) {
+ kvfree(ttc);
+--
+2.39.5
+
--- /dev/null
+From 6b4d070dfde490b314ddc432e693be55c749d9ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 15:14:10 +0100
+Subject: net: phy: Add helper for getting tx amplitude gain
+
+From: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+
+[ Upstream commit 961ee5aeea048aa292f28d61f3a96a48554e91af ]
+
+Add helper which returns the tx amplitude gain defined in device tree.
+Modifying it can be necessary to compensate losses on the PCB and
+connector, so the voltages measured on the RJ45 pins are conforming.
+
+Signed-off-by: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20250214-dp83822-tx-swing-v5-2-02ca72620599@liebherr.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 607b310ada5e ("net: dp83822: Fix OF_MDIO config check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 53 ++++++++++++++++++++++++------------
+ include/linux/phy.h | 4 +++
+ 2 files changed, 39 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 92161af788afd..2a01887c5617e 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -3123,19 +3123,12 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause)
+ EXPORT_SYMBOL(phy_get_pause);
+
+ #if IS_ENABLED(CONFIG_OF_MDIO)
+-static int phy_get_int_delay_property(struct device *dev, const char *name)
++static int phy_get_u32_property(struct device *dev, const char *name, u32 *val)
+ {
+- s32 int_delay;
+- int ret;
+-
+- ret = device_property_read_u32(dev, name, &int_delay);
+- if (ret)
+- return ret;
+-
+- return int_delay;
++ return device_property_read_u32(dev, name, val);
+ }
+ #else
+-static int phy_get_int_delay_property(struct device *dev, const char *name)
++static int phy_get_u32_property(struct device *dev, const char *name, u32 *val)
+ {
+ return -EINVAL;
+ }
+@@ -3160,12 +3153,12 @@ static int phy_get_int_delay_property(struct device *dev, const char *name)
+ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
+ const int *delay_values, int size, bool is_rx)
+ {
+- s32 delay;
+- int i;
++ int i, ret;
++ u32 delay;
+
+ if (is_rx) {
+- delay = phy_get_int_delay_property(dev, "rx-internal-delay-ps");
+- if (delay < 0 && size == 0) {
++ ret = phy_get_u32_property(dev, "rx-internal-delay-ps", &delay);
++ if (ret < 0 && size == 0) {
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+ return 1;
+@@ -3174,8 +3167,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
+ }
+
+ } else {
+- delay = phy_get_int_delay_property(dev, "tx-internal-delay-ps");
+- if (delay < 0 && size == 0) {
++ ret = phy_get_u32_property(dev, "tx-internal-delay-ps", &delay);
++ if (ret < 0 && size == 0) {
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+ return 1;
+@@ -3184,8 +3177,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
+ }
+ }
+
+- if (delay < 0)
+- return delay;
++ if (ret < 0)
++ return ret;
+
+ if (size == 0)
+ return delay;
+@@ -3220,6 +3213,30 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
+ }
+ EXPORT_SYMBOL(phy_get_internal_delay);
+
++/**
++ * phy_get_tx_amplitude_gain - stores tx amplitude gain in @val
++ * @phydev: phy_device struct
++ * @dev: pointer to the devices device struct
++ * @linkmode: linkmode for which the tx amplitude gain should be retrieved
++ * @val: tx amplitude gain
++ *
++ * Returns: 0 on success, < 0 on failure
++ */
++int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
++ enum ethtool_link_mode_bit_indices linkmode,
++ u32 *val)
++{
++ switch (linkmode) {
++ case ETHTOOL_LINK_MODE_100baseT_Full_BIT:
++ return phy_get_u32_property(dev,
++ "tx-amplitude-100base-tx-percent",
++ val);
++ default:
++ return -EINVAL;
++ }
++}
++EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain);
++
+ static int phy_led_set_brightness(struct led_classdev *led_cdev,
+ enum led_brightness value)
+ {
+diff --git a/include/linux/phy.h b/include/linux/phy.h
+index 19f076a71f946..7c9da26145d30 100644
+--- a/include/linux/phy.h
++++ b/include/linux/phy.h
+@@ -2114,6 +2114,10 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause);
+ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev,
+ const int *delay_values, int size, bool is_rx);
+
++int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev,
++ enum ethtool_link_mode_bit_indices linkmode,
++ u32 *val);
++
+ void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv,
+ bool *tx_pause, bool *rx_pause);
+
+--
+2.39.5
+
--- /dev/null
+From b3295298245f8c2cae8ae72dd445d21eeb63fdca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 15:14:11 +0100
+Subject: net: phy: dp83822: Add support for changing the transmit amplitude
+ voltage
+
+From: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+
+[ Upstream commit 4f3735e82d8a2e80ee39731832536b1e34697c71 ]
+
+Add support for changing the transmit amplitude voltage in 100BASE-TX mode.
+Modifying it can be necessary to compensate losses on the PCB and
+connector, so the voltages measured on the RJ45 pins are conforming.
+
+Signed-off-by: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20250214-dp83822-tx-swing-v5-3-02ca72620599@liebherr.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 607b310ada5e ("net: dp83822: Fix OF_MDIO config check")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/dp83822.c | 38 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
+index 6599feca1967d..3662f3905d5ad 100644
+--- a/drivers/net/phy/dp83822.c
++++ b/drivers/net/phy/dp83822.c
+@@ -31,6 +31,7 @@
+ #define MII_DP83822_RCSR 0x17
+ #define MII_DP83822_RESET_CTRL 0x1f
+ #define MII_DP83822_MLEDCR 0x25
++#define MII_DP83822_LDCTRL 0x403
+ #define MII_DP83822_LEDCFG1 0x460
+ #define MII_DP83822_IOCTRL1 0x462
+ #define MII_DP83822_IOCTRL2 0x463
+@@ -123,6 +124,9 @@
+ #define DP83822_IOCTRL1_GPIO1_CTRL GENMASK(2, 0)
+ #define DP83822_IOCTRL1_GPIO1_CTRL_LED_1 BIT(0)
+
++/* LDCTRL bits */
++#define DP83822_100BASE_TX_LINE_DRIVER_SWING GENMASK(7, 4)
++
+ /* IOCTRL2 bits */
+ #define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4)
+ #define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0)
+@@ -197,6 +201,7 @@ struct dp83822_private {
+ bool set_gpio2_clk_out;
+ u32 gpio2_clk_out;
+ bool led_pin_enable[DP83822_MAX_LED_PINS];
++ int tx_amplitude_100base_tx_index;
+ };
+
+ static int dp83822_config_wol(struct phy_device *phydev,
+@@ -522,6 +527,12 @@ static int dp83822_config_init(struct phy_device *phydev)
+ FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC,
+ dp83822->gpio2_clk_out));
+
++ if (dp83822->tx_amplitude_100base_tx_index >= 0)
++ phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_LDCTRL,
++ DP83822_100BASE_TX_LINE_DRIVER_SWING,
++ FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING,
++ dp83822->tx_amplitude_100base_tx_index));
++
+ err = dp83822_config_init_leds(phydev);
+ if (err)
+ return err;
+@@ -720,6 +731,11 @@ static int dp83822_phy_reset(struct phy_device *phydev)
+ }
+
+ #ifdef CONFIG_OF_MDIO
++static const u32 tx_amplitude_100base_tx_gain[] = {
++ 80, 82, 83, 85, 87, 88, 90, 92,
++ 93, 95, 97, 98, 100, 102, 103, 105,
++};
++
+ static int dp83822_of_init_leds(struct phy_device *phydev)
+ {
+ struct device_node *node = phydev->mdio.dev.of_node;
+@@ -780,6 +796,8 @@ static int dp83822_of_init(struct phy_device *phydev)
+ struct dp83822_private *dp83822 = phydev->priv;
+ struct device *dev = &phydev->mdio.dev;
+ const char *of_val;
++ int i, ret;
++ u32 val;
+
+ /* Signal detection for the PHY is only enabled if the FX_EN and the
+ * SD_EN pins are strapped. Signal detection can only enabled if FX_EN
+@@ -815,6 +833,26 @@ static int dp83822_of_init(struct phy_device *phydev)
+ dp83822->set_gpio2_clk_out = true;
+ }
+
++ dp83822->tx_amplitude_100base_tx_index = -1;
++ ret = phy_get_tx_amplitude_gain(phydev, dev,
++ ETHTOOL_LINK_MODE_100baseT_Full_BIT,
++ &val);
++ if (!ret) {
++ for (i = 0; i < ARRAY_SIZE(tx_amplitude_100base_tx_gain); i++) {
++ if (tx_amplitude_100base_tx_gain[i] == val) {
++ dp83822->tx_amplitude_100base_tx_index = i;
++ break;
++ }
++ }
++
++ if (dp83822->tx_amplitude_100base_tx_index < 0) {
++ phydev_err(phydev,
++ "Invalid value for tx-amplitude-100base-tx-percent property (%u)\n",
++ val);
++ return -EINVAL;
++ }
++ }
++
+ return dp83822_of_init_leds(phydev);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 94665d32a7069e1111905b58dd2ab0a671a5d54b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 11:25:56 +0800
+Subject: net: phy: leds: fix memory leak
+
+From: Qingfang Deng <qingfang.deng@siflower.com.cn>
+
+[ Upstream commit b7f0ee992adf601aa00c252418266177eb7ac2bc ]
+
+A network restart test on a router led to an out-of-memory condition,
+which was traced to a memory leak in the PHY LED trigger code.
+
+The root cause is misuse of the devm API. The registration function
+(phy_led_triggers_register) is called from phy_attach_direct, not
+phy_probe, and the unregister function (phy_led_triggers_unregister)
+is called from phy_detach, not phy_remove. This means the register and
+unregister functions can be called multiple times for the same PHY
+device, but devm-allocated memory is not freed until the driver is
+unbound.
+
+This also prevents kmemleak from detecting the leak, as the devm API
+internally stores the allocated pointer.
+
+Fix this by replacing devm_kzalloc/devm_kcalloc with standard
+kzalloc/kcalloc, and add the corresponding kfree calls in the unregister
+path.
+
+Fixes: 3928ee6485a3 ("net: phy: leds: Add support for "link" trigger")
+Fixes: 2e0bc452f472 ("net: phy: leds: add support for led triggers on phy link state change")
+Signed-off-by: Hao Guan <hao.guan@siflower.com.cn>
+Signed-off-by: Qingfang Deng <qingfang.deng@siflower.com.cn>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20250417032557.2929427-1-dqfext@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_led_triggers.c | 23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c
+index f550576eb9dae..6f9d8da76c4df 100644
+--- a/drivers/net/phy/phy_led_triggers.c
++++ b/drivers/net/phy/phy_led_triggers.c
+@@ -91,9 +91,8 @@ int phy_led_triggers_register(struct phy_device *phy)
+ if (!phy->phy_num_led_triggers)
+ return 0;
+
+- phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev,
+- sizeof(*phy->led_link_trigger),
+- GFP_KERNEL);
++ phy->led_link_trigger = kzalloc(sizeof(*phy->led_link_trigger),
++ GFP_KERNEL);
+ if (!phy->led_link_trigger) {
+ err = -ENOMEM;
+ goto out_clear;
+@@ -103,10 +102,9 @@ int phy_led_triggers_register(struct phy_device *phy)
+ if (err)
+ goto out_free_link;
+
+- phy->phy_led_triggers = devm_kcalloc(&phy->mdio.dev,
+- phy->phy_num_led_triggers,
+- sizeof(struct phy_led_trigger),
+- GFP_KERNEL);
++ phy->phy_led_triggers = kcalloc(phy->phy_num_led_triggers,
++ sizeof(struct phy_led_trigger),
++ GFP_KERNEL);
+ if (!phy->phy_led_triggers) {
+ err = -ENOMEM;
+ goto out_unreg_link;
+@@ -127,11 +125,11 @@ int phy_led_triggers_register(struct phy_device *phy)
+ out_unreg:
+ while (i--)
+ phy_led_trigger_unregister(&phy->phy_led_triggers[i]);
+- devm_kfree(&phy->mdio.dev, phy->phy_led_triggers);
++ kfree(phy->phy_led_triggers);
+ out_unreg_link:
+ phy_led_trigger_unregister(phy->led_link_trigger);
+ out_free_link:
+- devm_kfree(&phy->mdio.dev, phy->led_link_trigger);
++ kfree(phy->led_link_trigger);
+ phy->led_link_trigger = NULL;
+ out_clear:
+ phy->phy_num_led_triggers = 0;
+@@ -145,8 +143,13 @@ void phy_led_triggers_unregister(struct phy_device *phy)
+
+ for (i = 0; i < phy->phy_num_led_triggers; i++)
+ phy_led_trigger_unregister(&phy->phy_led_triggers[i]);
++ kfree(phy->phy_led_triggers);
++ phy->phy_led_triggers = NULL;
+
+- if (phy->led_link_trigger)
++ if (phy->led_link_trigger) {
+ phy_led_trigger_unregister(phy->led_link_trigger);
++ kfree(phy->led_link_trigger);
++ phy->led_link_trigger = NULL;
++ }
+ }
+ EXPORT_SYMBOL_GPL(phy_led_triggers_unregister);
+--
+2.39.5
+
--- /dev/null
+From 3537813107fb2288ffff0c3d424080425a5066f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Apr 2025 17:16:01 +0100
+Subject: net: phylink: fix suspend/resume with WoL enabled and link down
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 4c8925cb9db158c812e1e11f3e74b945df7c9801 ]
+
+When WoL is enabled, we update the software state in phylink to
+indicate that the link is down, and disable the resolver from
+bringing the link back up.
+
+On resume, we attempt to bring the overall state into consistency
+by calling the .mac_link_down() method, but this is wrong if the
+link was already down, as phylink strictly orders the .mac_link_up()
+and .mac_link_down() methods - and this would break that ordering.
+
+Fixes: f97493657c63 ("net: phylink: add suspend/resume support")
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Tested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://patch.msgid.link/E1u55Qf-0016RN-PA@rmk-PC.armlinux.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phylink.c | 38 ++++++++++++++++++++++----------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
+index b74b1c3365000..306275fbe4c98 100644
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -82,6 +82,7 @@ struct phylink {
+ unsigned int pcs_state;
+
+ bool link_failed;
++ bool suspend_link_up;
+ bool major_config_failed;
+ bool mac_supports_eee_ops;
+ bool mac_supports_eee;
+@@ -2645,14 +2646,16 @@ void phylink_suspend(struct phylink *pl, bool mac_wol)
+ /* Stop the resolver bringing the link up */
+ __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state);
+
+- /* Disable the carrier, to prevent transmit timeouts,
+- * but one would hope all packets have been sent. This
+- * also means phylink_resolve() will do nothing.
+- */
+- if (pl->netdev)
+- netif_carrier_off(pl->netdev);
+- else
++ pl->suspend_link_up = phylink_link_is_up(pl);
++ if (pl->suspend_link_up) {
++ /* Disable the carrier, to prevent transmit timeouts,
++ * but one would hope all packets have been sent. This
++ * also means phylink_resolve() will do nothing.
++ */
++ if (pl->netdev)
++ netif_carrier_off(pl->netdev);
+ pl->old_link_state = false;
++ }
+
+ /* We do not call mac_link_down() here as we want the
+ * link to remain up to receive the WoL packets.
+@@ -2678,15 +2681,18 @@ void phylink_resume(struct phylink *pl)
+ if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) {
+ /* Wake-on-Lan enabled, MAC handling */
+
+- /* Call mac_link_down() so we keep the overall state balanced.
+- * Do this under the state_mutex lock for consistency. This
+- * will cause a "Link Down" message to be printed during
+- * resume, which is harmless - the true link state will be
+- * printed when we run a resolve.
+- */
+- mutex_lock(&pl->state_mutex);
+- phylink_link_down(pl);
+- mutex_unlock(&pl->state_mutex);
++ if (pl->suspend_link_up) {
++ /* Call mac_link_down() so we keep the overall state
++ * balanced. Do this under the state_mutex lock for
++ * consistency. This will cause a "Link Down" message
++ * to be printed during resume, which is harmless -
++ * the true link state will be printed when we run a
++ * resolve.
++ */
++ mutex_lock(&pl->state_mutex);
++ phylink_link_down(pl);
++ mutex_unlock(&pl->state_mutex);
++ }
+
+ /* Re-apply the link parameters so that all the settings get
+ * restored to the MAC.
+--
+2.39.5
+
--- /dev/null
+From 14dc0f586efca1f4a718bac4f126d308c4ccc971 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Mar 2025 16:40:08 +0000
+Subject: net: phylink: force link down on major_config failure
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f1ae32a709e0b525d7963207eb3a4747626f4818 ]
+
+If we fail to configure the MAC or PCS according to the desired mode,
+do not allow the network link to come up until we have successfully
+configured the MAC and PCS. This improves phylink's behaviour when an
+error occurs.
+
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://patch.msgid.link/E1twkqO-0006FI-Gm@rmk-PC.armlinux.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 4c8925cb9db1 ("net: phylink: fix suspend/resume with WoL enabled and link down")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phylink.c | 42 +++++++++++++++++++++++++++++++--------
+ 1 file changed, 34 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
+index b00a315de0601..b74b1c3365000 100644
+--- a/drivers/net/phy/phylink.c
++++ b/drivers/net/phy/phylink.c
+@@ -82,6 +82,7 @@ struct phylink {
+ unsigned int pcs_state;
+
+ bool link_failed;
++ bool major_config_failed;
+ bool mac_supports_eee_ops;
+ bool mac_supports_eee;
+ bool phy_enable_tx_lpi;
+@@ -1360,12 +1361,16 @@ static void phylink_major_config(struct phylink *pl, bool restart,
+ phylink_an_mode_str(pl->req_link_an_mode),
+ phy_modes(state->interface));
+
++ pl->major_config_failed = false;
++
+ if (pl->mac_ops->mac_select_pcs) {
+ pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface);
+ if (IS_ERR(pcs)) {
+ phylink_err(pl,
+ "mac_select_pcs unexpectedly failed: %pe\n",
+ pcs);
++
++ pl->major_config_failed = true;
+ return;
+ }
+
+@@ -1387,6 +1392,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
+ if (err < 0) {
+ phylink_err(pl, "mac_prepare failed: %pe\n",
+ ERR_PTR(err));
++ pl->major_config_failed = true;
+ return;
+ }
+ }
+@@ -1410,8 +1416,15 @@ static void phylink_major_config(struct phylink *pl, bool restart,
+
+ phylink_mac_config(pl, state);
+
+- if (pl->pcs)
+- phylink_pcs_post_config(pl->pcs, state->interface);
++ if (pl->pcs) {
++ err = phylink_pcs_post_config(pl->pcs, state->interface);
++ if (err < 0) {
++ phylink_err(pl, "pcs_post_config failed: %pe\n",
++ ERR_PTR(err));
++
++ pl->major_config_failed = true;
++ }
++ }
+
+ if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed)
+ phylink_pcs_enable(pl->pcs);
+@@ -1422,11 +1435,12 @@ static void phylink_major_config(struct phylink *pl, bool restart,
+
+ err = phylink_pcs_config(pl->pcs, neg_mode, state,
+ !!(pl->link_config.pause & MLO_PAUSE_AN));
+- if (err < 0)
+- phylink_err(pl, "pcs_config failed: %pe\n",
+- ERR_PTR(err));
+- else if (err > 0)
++ if (err < 0) {
++ phylink_err(pl, "pcs_config failed: %pe\n", ERR_PTR(err));
++ pl->major_config_failed = true;
++ } else if (err > 0) {
+ restart = true;
++ }
+
+ if (restart)
+ phylink_pcs_an_restart(pl);
+@@ -1434,16 +1448,22 @@ static void phylink_major_config(struct phylink *pl, bool restart,
+ if (pl->mac_ops->mac_finish) {
+ err = pl->mac_ops->mac_finish(pl->config, pl->act_link_an_mode,
+ state->interface);
+- if (err < 0)
++ if (err < 0) {
+ phylink_err(pl, "mac_finish failed: %pe\n",
+ ERR_PTR(err));
++
++ pl->major_config_failed = true;
++ }
+ }
+
+ if (pl->phydev && pl->phy_ib_mode) {
+ err = phy_config_inband(pl->phydev, pl->phy_ib_mode);
+- if (err < 0)
++ if (err < 0) {
+ phylink_err(pl, "phy_config_inband: %pe\n",
+ ERR_PTR(err));
++
++ pl->major_config_failed = true;
++ }
+ }
+
+ if (pl->sfp_bus) {
+@@ -1795,6 +1815,12 @@ static void phylink_resolve(struct work_struct *w)
+ }
+ }
+
++ /* If configuration of the interface failed, force the link down
++ * until we get a successful configuration.
++ */
++ if (pl->major_config_failed)
++ link_state.link = false;
++
+ if (link_state.link != cur_link_state) {
+ pl->old_link_state = link_state.link;
+ if (!link_state.link)
+--
+2.39.5
+
--- /dev/null
+From 5329aaa3189956b271142095681cd8661cb03de8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 09:12:09 +0200
+Subject: net: stmmac: fix dwmac1000 ptp timestamp status offset
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alexis Lothore <alexis.lothore@bootlin.com>
+
+[ Upstream commit 73fa4597bdc035437fbcd84d6be32bd39f1f2149 ]
+
+When a PTP interrupt occurs, the driver accesses the wrong offset to
+learn about the number of available snapshots in the FIFO for dwmac1000:
+it should be accessing bits 29..25, while it is currently reading bits
+19..16 (those are bits about the auxiliary triggers which have generated
+the timestamps). As a consequence, it does not compute correctly the
+number of available snapshots, and so possibly do not generate the
+corresponding clock events if the bogus value ends up being 0.
+
+Fix clock events generation by reading the correct bits in the timestamp
+register for dwmac1000.
+
+Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations")
+Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
+Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/20250423-stmmac_ts-v2-1-e2cf2bbd61b1@bootlin.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+index 600fea8f712fd..2d5bf1de5d2e4 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+@@ -331,8 +331,8 @@ enum rtc_control {
+
+ /* PTP and timestamping registers */
+
+-#define GMAC3_X_ATSNS GENMASK(19, 16)
+-#define GMAC3_X_ATSNS_SHIFT 16
++#define GMAC3_X_ATSNS GENMASK(29, 25)
++#define GMAC3_X_ATSNS_SHIFT 25
+
+ #define GMAC_PTP_TCR_ATSFC BIT(24)
+ #define GMAC_PTP_TCR_ATSEN0 BIT(25)
+--
+2.39.5
+
--- /dev/null
+From 9af54610d5957845e8e5fbf542a7ce000899f7bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 09:12:10 +0200
+Subject: net: stmmac: fix multiplication overflow when reading timestamp
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alexis Lothoré <alexis.lothore@bootlin.com>
+
+[ Upstream commit 7b7491372f8ec2d8c08da18e5d629e55f41dda89 ]
+
+The current way of reading a timestamp snapshot in stmmac can lead to
+integer overflow, as the computation is done on 32 bits. The issue has
+been observed on a dwmac-socfpga platform returning chaotic timestamp
+values due to this overflow. The corresponding multiplication is done
+with a MUL instruction, which returns 32 bit values. Explicitly casting
+the value to 64 bits replaced the MUL with a UMLAL, which computes and
+returns the result on 64 bits, and so returns correctly the timestamps.
+
+Prevent this overflow by explicitly casting the intermediate value to
+u64 to make sure that the whole computation is made on u64. While at it,
+apply the same cast on the other dwmac variant (GMAC4) method for
+snapshot retrieval.
+
+Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations")
+Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
+Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/20250423-stmmac_ts-v2-2-e2cf2bbd61b1@bootlin.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +-
+ drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+index 96bcda0856ec6..11c525b8d2698 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+@@ -560,7 +560,7 @@ void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
+ u64 ns;
+
+ ns = readl(ptpaddr + GMAC_PTP_ATNR);
+- ns += readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC;
++ ns += (u64)readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC;
+
+ *ptp_time = ns;
+ }
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+index 0f59aa9826040..e2840fa241f29 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+@@ -222,7 +222,7 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
+ u64 ns;
+
+ ns = readl(ptpaddr + PTP_ATNR);
+- ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC;
++ ns += (u64)readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC;
+
+ *ptp_time = ns;
+ }
+--
+2.39.5
+
--- /dev/null
+From 58b154f582934840f1dda2382efbf973d72b60c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 11:47:31 -0700
+Subject: net_sched: hfsc: Fix a potential UAF in hfsc_dequeue() too
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 6ccbda44e2cc3d26fd22af54c650d6d5d801addf ]
+
+Similarly to the previous patch, we need to safe guard hfsc_dequeue()
+too. But for this one, we don't have a reliable reproducer.
+
+Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2")
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://patch.msgid.link/20250417184732.943057-3-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_hfsc.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index e730d3f791c24..5bb4ab9941d6e 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1637,10 +1637,16 @@ hfsc_dequeue(struct Qdisc *sch)
+ if (cl->qdisc->q.qlen != 0) {
+ /* update ed */
+ next_len = qdisc_peek_len(cl->qdisc);
+- if (realtime)
+- update_ed(cl, next_len);
+- else
+- update_d(cl, next_len);
++ /* Check queue length again since some qdisc implementations
++ * (e.g., netem/codel) might empty the queue during the peek
++ * operation.
++ */
++ if (cl->qdisc->q.qlen != 0) {
++ if (realtime)
++ update_ed(cl, next_len);
++ else
++ update_d(cl, next_len);
++ }
+ } else {
+ /* the class becomes passive */
+ eltree_remove(cl);
+--
+2.39.5
+
--- /dev/null
+From ce557b8f5e5093d44724fb759ad47420f9b7cf26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 11:47:30 -0700
+Subject: net_sched: hfsc: Fix a UAF vulnerability in class handling
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 3df275ef0a6ae181e8428a6589ef5d5231e58b5c ]
+
+This patch fixes a Use-After-Free vulnerability in the HFSC qdisc class
+handling. The issue occurs due to a time-of-check/time-of-use condition
+in hfsc_change_class() when working with certain child qdiscs like netem
+or codel.
+
+The vulnerability works as follows:
+1. hfsc_change_class() checks if a class has packets (q.qlen != 0)
+2. It then calls qdisc_peek_len(), which for certain qdiscs (e.g.,
+ codel, netem) might drop packets and empty the queue
+3. The code continues assuming the queue is still non-empty, adding
+ the class to vttree
+4. This breaks HFSC scheduler assumptions that only non-empty classes
+ are in vttree
+5. Later, when the class is destroyed, this can lead to a Use-After-Free
+
+The fix adds a second queue length check after qdisc_peek_len() to verify
+the queue wasn't emptied.
+
+Fixes: 21f4d5cc25ec ("net_sched/hfsc: fix curve activation in hfsc_change_class()")
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Reviewed-by: Konstantin Khlebnikov <koct9i@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://patch.msgid.link/20250417184732.943057-2-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_hfsc.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index c287bf8423b47..e730d3f791c24 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -958,6 +958,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+
+ if (cl != NULL) {
+ int old_flags;
++ int len = 0;
+
+ if (parentid) {
+ if (cl->cl_parent &&
+@@ -988,9 +989,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ if (usc != NULL)
+ hfsc_change_usc(cl, usc, cur_time);
+
++ if (cl->qdisc->q.qlen != 0)
++ len = qdisc_peek_len(cl->qdisc);
++ /* Check queue length again since some qdisc implementations
++ * (e.g., netem/codel) might empty the queue during the peek
++ * operation.
++ */
+ if (cl->qdisc->q.qlen != 0) {
+- int len = qdisc_peek_len(cl->qdisc);
+-
+ if (cl->cl_flags & HFSC_RSC) {
+ if (old_flags & HFSC_RSC)
+ update_ed(cl, len);
+--
+2.39.5
+
--- /dev/null
+From da5a95bea16f87f0a54fc972718e28afea8bc520 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Apr 2025 10:02:50 +0200
+Subject: nvmet: fix out-of-bounds access in nvmet_enable_port
+
+From: Richard Weinberger <richard@nod.at>
+
+[ Upstream commit 3d7aa0c7b4e96cd460826d932e44710cdeb3378b ]
+
+When trying to enable a port that has no transport configured yet,
+nvmet_enable_port() uses NVMF_TRTYPE_MAX (255) to query the transports
+array, causing an out-of-bounds access:
+
+[ 106.058694] BUG: KASAN: global-out-of-bounds in nvmet_enable_port+0x42/0x1da
+[ 106.058719] Read of size 8 at addr ffffffff89dafa58 by task ln/632
+[...]
+[ 106.076026] nvmet: transport type 255 not supported
+
+Since commit 200adac75888, NVMF_TRTYPE_MAX is the default state as configured by
+nvmet_ports_make().
+Avoid this by checking for NVMF_TRTYPE_MAX before proceeding.
+
+Fixes: 200adac75888 ("nvme: Add PCI transport type")
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/core.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
+index 2e741696f3712..6ccce0ee51573 100644
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port)
+
+ lockdep_assert_held(&nvmet_config_sem);
+
++ if (port->disc_addr.trtype == NVMF_TRTYPE_MAX)
++ return -EINVAL;
++
+ ops = nvmet_transports[port->disc_addr.trtype];
+ if (!ops) {
+ up_write(&nvmet_config_sem);
+--
+2.39.5
+
--- /dev/null
+From c1981b6ef3fce7c8e85a3bb38f2c49d3307c92c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 10:46:04 -0700
+Subject: pds_core: handle unsupported PDS_CORE_CMD_FW_CONTROL result
+
+From: Brett Creeley <brett.creeley@amd.com>
+
+[ Upstream commit 2567daad69cd1107fc0ec29b1615f110d7cf7385 ]
+
+If the FW doesn't support the PDS_CORE_CMD_FW_CONTROL command
+the driver might at the least print garbage and at the worst
+crash when the user runs the "devlink dev info" devlink command.
+
+This happens because the stack variable fw_list is not 0
+initialized which results in fw_list.num_fw_slots being a
+garbage value from the stack. Then the driver tries to access
+fw_list.fw_names[i] with i >= ARRAY_SIZE and runs off the end
+of the array.
+
+Fix this by initializing the fw_list and by not failing
+completely if the devcmd fails because other useful information
+is printed via devlink dev info even if the devcmd fails.
+
+Fixes: 45d76f492938 ("pds_core: set up device and adminq")
+Signed-off-by: Brett Creeley <brett.creeley@amd.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250421174606.3892-3-shannon.nelson@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/pds_core/devlink.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
+index 44971e71991ff..ca23cde385e67 100644
+--- a/drivers/net/ethernet/amd/pds_core/devlink.c
++++ b/drivers/net/ethernet/amd/pds_core/devlink.c
+@@ -102,7 +102,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+ .fw_control.oper = PDS_CORE_FW_GET_LIST,
+ };
+- struct pds_core_fw_list_info fw_list;
++ struct pds_core_fw_list_info fw_list = {};
+ struct pdsc *pdsc = devlink_priv(dl);
+ union pds_core_dev_comp comp;
+ char buf[32];
+@@ -115,8 +115,6 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ if (!err)
+ memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
+ mutex_unlock(&pdsc->devcmd_lock);
+- if (err && err != -EIO)
+- return err;
+
+ listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names));
+ for (i = 0; i < listlen; i++) {
+--
+2.39.5
+
--- /dev/null
+From 141f3d400894f12a066e9fcebda18a45b71e6398 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 10:46:06 -0700
+Subject: pds_core: make wait_context part of q_info
+
+From: Shannon Nelson <shannon.nelson@amd.com>
+
+[ Upstream commit 3f77c3dfffc7063428b100c4945ca2a7a8680380 ]
+
+Make the wait_context a full part of the q_info struct rather
+than a stack variable that goes away after pdsc_adminq_post()
+is done so that the context is still available after the wait
+loop has given up.
+
+There was a case where a slow development firmware caused
+the adminq request to time out, but then later the FW finally
+finished the request and sent the interrupt. The handler tried
+to complete_all() the completion context that had been created
+on the stack in pdsc_adminq_post() but no longer existed.
+This caused bad pointer usage, kernel crashes, and much wailing
+and gnashing of teeth.
+
+Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250421174606.3892-5-shannon.nelson@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/pds_core/adminq.c | 36 +++++++++-------------
+ drivers/net/ethernet/amd/pds_core/core.c | 4 ++-
+ drivers/net/ethernet/amd/pds_core/core.h | 2 +-
+ 3 files changed, 18 insertions(+), 24 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c
+index c83a0a80d5334..506f682d15c10 100644
+--- a/drivers/net/ethernet/amd/pds_core/adminq.c
++++ b/drivers/net/ethernet/amd/pds_core/adminq.c
+@@ -5,11 +5,6 @@
+
+ #include "core.h"
+
+-struct pdsc_wait_context {
+- struct pdsc_qcq *qcq;
+- struct completion wait_completion;
+-};
+-
+ static int pdsc_process_notifyq(struct pdsc_qcq *qcq)
+ {
+ union pds_core_notifyq_comp *comp;
+@@ -109,10 +104,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq)
+ q_info = &q->info[q->tail_idx];
+ q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
+
+- /* Copy out the completion data */
+- memcpy(q_info->dest, comp, sizeof(*comp));
+-
+- complete_all(&q_info->wc->wait_completion);
++ if (!completion_done(&q_info->completion)) {
++ memcpy(q_info->dest, comp, sizeof(*comp));
++ complete(&q_info->completion);
++ }
+
+ if (cq->tail_idx == cq->num_descs - 1)
+ cq->done_color = !cq->done_color;
+@@ -162,8 +157,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data)
+ static int __pdsc_adminq_post(struct pdsc *pdsc,
+ struct pdsc_qcq *qcq,
+ union pds_core_adminq_cmd *cmd,
+- union pds_core_adminq_comp *comp,
+- struct pdsc_wait_context *wc)
++ union pds_core_adminq_comp *comp)
+ {
+ struct pdsc_queue *q = &qcq->q;
+ struct pdsc_q_info *q_info;
+@@ -205,9 +199,9 @@ static int __pdsc_adminq_post(struct pdsc *pdsc,
+ /* Post the request */
+ index = q->head_idx;
+ q_info = &q->info[index];
+- q_info->wc = wc;
+ q_info->dest = comp;
+ memcpy(q_info->desc, cmd, sizeof(*cmd));
++ reinit_completion(&q_info->completion);
+
+ dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n",
+ q->head_idx, q->tail_idx);
+@@ -231,16 +225,13 @@ int pdsc_adminq_post(struct pdsc *pdsc,
+ union pds_core_adminq_comp *comp,
+ bool fast_poll)
+ {
+- struct pdsc_wait_context wc = {
+- .wait_completion =
+- COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion),
+- };
+ unsigned long poll_interval = 1;
+ unsigned long poll_jiffies;
+ unsigned long time_limit;
+ unsigned long time_start;
+ unsigned long time_done;
+ unsigned long remaining;
++ struct completion *wc;
+ int err = 0;
+ int index;
+
+@@ -250,20 +241,19 @@ int pdsc_adminq_post(struct pdsc *pdsc,
+ return -ENXIO;
+ }
+
+- wc.qcq = &pdsc->adminqcq;
+- index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc);
++ index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp);
+ if (index < 0) {
+ err = index;
+ goto err_out;
+ }
+
++ wc = &pdsc->adminqcq.q.info[index].completion;
+ time_start = jiffies;
+ time_limit = time_start + HZ * pdsc->devcmd_timeout;
+ do {
+ /* Timeslice the actual wait to catch IO errors etc early */
+ poll_jiffies = msecs_to_jiffies(poll_interval);
+- remaining = wait_for_completion_timeout(&wc.wait_completion,
+- poll_jiffies);
++ remaining = wait_for_completion_timeout(wc, poll_jiffies);
+ if (remaining)
+ break;
+
+@@ -292,9 +282,11 @@ int pdsc_adminq_post(struct pdsc *pdsc,
+ dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n",
+ __func__, jiffies_to_msecs(time_done - time_start));
+
+- /* Check the results */
+- if (time_after_eq(time_done, time_limit))
++ /* Check the results and clear an un-completed timeout */
++ if (time_after_eq(time_done, time_limit) && !completion_done(wc)) {
+ err = -ETIMEDOUT;
++ complete(wc);
++ }
+
+ dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index);
+ dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
+diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
+index 4830292d5f879..3c60d4cf9d0e1 100644
+--- a/drivers/net/ethernet/amd/pds_core/core.c
++++ b/drivers/net/ethernet/amd/pds_core/core.c
+@@ -167,8 +167,10 @@ static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa)
+ q->base = base;
+ q->base_pa = base_pa;
+
+- for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
++ for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) {
+ cur->desc = base + (i * q->desc_size);
++ init_completion(&cur->completion);
++ }
+ }
+
+ static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa)
+diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
+index 543097983bf60..ec637dc4327a5 100644
+--- a/drivers/net/ethernet/amd/pds_core/core.h
++++ b/drivers/net/ethernet/amd/pds_core/core.h
+@@ -96,7 +96,7 @@ struct pdsc_q_info {
+ unsigned int bytes;
+ unsigned int nbufs;
+ struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS];
+- struct pdsc_wait_context *wc;
++ struct completion completion;
+ void *dest;
+ };
+
+--
+2.39.5
+
--- /dev/null
+From c9086c3ea3062c49e84a07bbc9888b3fd42ab6ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 10:46:03 -0700
+Subject: pds_core: Prevent possible adminq overflow/stuck condition
+
+From: Brett Creeley <brett.creeley@amd.com>
+
+[ Upstream commit d9e2f070d8af60f2c8c02b2ddf0a9e90b4e9220c ]
+
+The pds_core's adminq is protected by the adminq_lock, which prevents
+more than 1 command to be posted onto it at any one time. This makes it
+so the client drivers cannot simultaneously post adminq commands.
+However, the completions happen in a different context, which means
+multiple adminq commands can be posted sequentially and all waiting
+on completion.
+
+On the FW side, the backing adminq request queue is only 16 entries
+long and the retry mechanism and/or overflow/stuck prevention is
+lacking. This can cause the adminq to get stuck, so commands are no
+longer processed and completions are no longer sent by the FW.
+
+As an initial fix, prevent more than 16 outstanding adminq commands so
+there's no way to cause the adminq from getting stuck. This works
+because the backing adminq request queue will never have more than 16
+pending adminq commands, so it will never overflow. This is done by
+reducing the adminq depth to 16.
+
+Fixes: 45d76f492938 ("pds_core: set up device and adminq")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Brett Creeley <brett.creeley@amd.com>
+Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250421174606.3892-2-shannon.nelson@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/pds_core/core.c | 5 +----
+ drivers/net/ethernet/amd/pds_core/core.h | 2 +-
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
+index 536635e577279..4830292d5f879 100644
+--- a/drivers/net/ethernet/amd/pds_core/core.c
++++ b/drivers/net/ethernet/amd/pds_core/core.c
+@@ -325,10 +325,7 @@ static int pdsc_core_init(struct pdsc *pdsc)
+ size_t sz;
+ int err;
+
+- /* Scale the descriptor ring length based on number of CPUs and VFs */
+- numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
+- numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
+- numdescs = roundup_pow_of_two(numdescs);
++ numdescs = PDSC_ADMINQ_MAX_LENGTH;
+ err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
+ PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
+ numdescs,
+diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
+index 14522d6d5f86b..543097983bf60 100644
+--- a/drivers/net/ethernet/amd/pds_core/core.h
++++ b/drivers/net/ethernet/amd/pds_core/core.h
+@@ -16,7 +16,7 @@
+
+ #define PDSC_WATCHDOG_SECS 5
+ #define PDSC_QUEUE_NAME_MAX_SZ 16
+-#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */
++#define PDSC_ADMINQ_MAX_LENGTH 16 /* must be a power of two */
+ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */
+ #define PDSC_TEARDOWN_RECOVERY false
+ #define PDSC_TEARDOWN_REMOVING true
+--
+2.39.5
+
--- /dev/null
+From e8eb4c41ff207df9ebb1eb0a0876784d4c79ae92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 10:46:05 -0700
+Subject: pds_core: Remove unnecessary check in pds_client_adminq_cmd()
+
+From: Brett Creeley <brett.creeley@amd.com>
+
+[ Upstream commit f9559d818205a4a0b9cd87181ef46e101ea11157 ]
+
+When the pds_core driver was first created there were some race
+conditions around using the adminq, especially for client drivers.
+To reduce the possibility of a race condition there's a check
+against pf->state in pds_client_adminq_cmd(). This is problematic
+for a couple of reasons:
+
+1. The PDSC_S_INITING_DRIVER bit is set during probe, but not
+ cleared until after everything in probe is complete, which
+ includes creating the auxiliary devices. For pds_fwctl this
+ means it can't make any adminq commands until after pds_core's
+ probe is complete even though the adminq is fully up by the
+ time pds_fwctl's auxiliary device is created.
+
+2. The race conditions around using the adminq have been fixed
+ and this path is already protected against client drivers
+ calling pds_client_adminq_cmd() if the adminq isn't ready,
+ i.e. see pdsc_adminq_post() -> pdsc_adminq_inc_if_up().
+
+Fix this by removing the pf->state check in pds_client_adminq_cmd()
+because invalid accesses to pds_core's adminq is already handled by
+pdsc_adminq_post()->pdsc_adminq_inc_if_up().
+
+Fixes: 10659034c622 ("pds_core: add the aux client API")
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Brett Creeley <brett.creeley@amd.com>
+Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250421174606.3892-4-shannon.nelson@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/pds_core/auxbus.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c
+index 2babea1109917..b76a9b7e0aed6 100644
+--- a/drivers/net/ethernet/amd/pds_core/auxbus.c
++++ b/drivers/net/ethernet/amd/pds_core/auxbus.c
+@@ -107,9 +107,6 @@ int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev,
+ dev_dbg(pf->dev, "%s: %s opcode %d\n",
+ __func__, dev_name(&padev->aux_dev.dev), req->opcode);
+
+- if (pf->state)
+- return -ENXIO;
+-
+ /* Wrap the client's request */
+ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD;
+ cmd.client_request.client_id = cpu_to_le16(padev->client_id);
+--
+2.39.5
+
--- /dev/null
+From fce72c6f0eb5f1b28859c409e3e799907a48b82b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 06:47:24 +0000
+Subject: perf/x86: Fix non-sampling (counting) events on certain x86 platforms
+
+From: Luo Gengkun <luogengkun@huaweicloud.com>
+
+[ Upstream commit 1a97fea9db9e9b9c4839d4232dde9f505ff5b4cc ]
+
+Perf doesn't work at perf stat for hardware events on certain x86 platforms:
+
+ $perf stat -- sleep 1
+ Performance counter stats for 'sleep 1':
+ 16.44 msec task-clock # 0.016 CPUs utilized
+ 2 context-switches # 121.691 /sec
+ 0 cpu-migrations # 0.000 /sec
+ 54 page-faults # 3.286 K/sec
+ <not supported> cycles
+ <not supported> instructions
+ <not supported> branches
+ <not supported> branch-misses
+
+The reason is that the check in x86_pmu_hw_config() for sampling events is
+unexpectedly applied to counting events as well.
+
+It should only impact x86 platforms with limit_period used for non-PEBS
+events. For Intel platforms, it should only impact some older platforms,
+e.g., HSW, BDW and NHM.
+
+Fixes: 88ec7eedbbd2 ("perf/x86: Fix low freqency setting issue")
+Signed-off-by: Luo Gengkun <luogengkun@huaweicloud.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi Bangoria <ravi.bangoria@amd.com>
+Link: https://lore.kernel.org/r/20250423064724.3716211-1-luogengkun@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index 3a27c50080f4f..ce8d4fdf54fbb 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event)
+ if (event->attr.type == event->pmu->type)
+ event->hw.config |= x86_pmu_get_event_config(event);
+
+- if (!event->attr.freq && x86_pmu.limit_period) {
++ if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) {
+ s64 left = event->attr.sample_period;
+ x86_pmu.limit_period(event, &left);
+ if (left > event->attr.sample_period)
+--
+2.39.5
+
--- /dev/null
+From 88e1908e0a566325313af6eba4bbe9304c113bfa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Apr 2025 22:12:59 +0200
+Subject: Revert "drm/meson: vclk: fix calculation of 59.94 fractional rates"
+
+From: Christian Hewitt <christianshewitt@gmail.com>
+
+[ Upstream commit f37bb5486ea536c1d61df89feeaeff3f84f0b560 ]
+
+This reverts commit bfbc68e.
+
+The patch does permit the offending YUV420 @ 59.94 phy_freq and
+vclk_freq mode to match in calculations. It also results in all
+fractional rates being unavailable for use. This was unintended
+and requires the patch to be reverted.
+
+Fixes: bfbc68e4d869 ("drm/meson: vclk: fix calculation of 59.94 fractional rates")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com
+Stable-dep-of: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/meson/meson_vclk.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
+index 2a942dc6a6dc2..2a82119eb58ed 100644
+--- a/drivers/gpu/drm/meson/meson_vclk.c
++++ b/drivers/gpu/drm/meson/meson_vclk.c
+@@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq,
+ FREQ_1000_1001(params[i].pixel_freq));
+ DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n",
+ i, params[i].phy_freq,
+- FREQ_1000_1001(params[i].phy_freq/1000)*1000);
++ FREQ_1000_1001(params[i].phy_freq/10)*10);
+ /* Match strict frequency */
+ if (phy_freq == params[i].phy_freq &&
+ vclk_freq == params[i].vclk_freq)
+ return MODE_OK;
+ /* Match 1000/1001 variant */
+- if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) &&
++ if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) &&
+ vclk_freq == FREQ_1000_1001(params[i].vclk_freq))
+ return MODE_OK;
+ }
+@@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
+
+ for (freq = 0 ; params[freq].pixel_freq ; ++freq) {
+ if ((phy_freq == params[freq].phy_freq ||
+- phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) &&
++ phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) &&
+ (vclk_freq == params[freq].vclk_freq ||
+ vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) {
+ if (vclk_freq != params[freq].vclk_freq)
+--
+2.39.5
+
--- /dev/null
+From aaace8c5615a952ce3dcaf0e635d1ff601bb0e9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Apr 2025 13:13:59 +0200
+Subject: riscv: Replace function-like macro by static inline function
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+[ Upstream commit 121f34341d396b666d8a90b24768b40e08ca0d61 ]
+
+The flush_icache_range() function is implemented as a "function-like
+macro with unused parameters", which can result in "unused variables"
+warnings.
+
+Replace the macro with a static inline function, as advised by
+Documentation/process/coding-style.rst.
+
+Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Link: https://lore.kernel.org/r/20250419111402.1660267-1-bjorn@kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/cacheflush.h | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
+index 8de73f91bfa37..b59ffeb668d6a 100644
+--- a/arch/riscv/include/asm/cacheflush.h
++++ b/arch/riscv/include/asm/cacheflush.h
+@@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page)
+ flush_dcache_folio(page_folio(page));
+ }
+
+-/*
+- * RISC-V doesn't have an instruction to flush parts of the instruction cache,
+- * so instead we just flush the whole thing.
+- */
+-#define flush_icache_range(start, end) flush_icache_all()
+ #define flush_icache_user_page(vma, pg, addr, len) \
+ do { \
+ if (vma->vm_flags & VM_EXEC) \
+@@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
+
+ #endif /* CONFIG_SMP */
+
++/*
++ * RISC-V doesn't have an instruction to flush parts of the instruction cache,
++ * so instead we just flush the whole thing.
++ */
++#define flush_icache_range flush_icache_range
++static inline void flush_icache_range(unsigned long start, unsigned long end)
++{
++ flush_icache_all();
++}
++
+ extern unsigned int riscv_cbom_block_size;
+ extern unsigned int riscv_cboz_block_size;
+ void riscv_init_cbo_blocksizes(void);
+--
+2.39.5
+
--- /dev/null
+From c4eb924e1ac6faf0d9bd1d1776723813e9884113 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Apr 2025 13:14:00 +0200
+Subject: riscv: uprobes: Add missing fence.i after building the XOL buffer
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+[ Upstream commit 7d1d19a11cfbfd8bae1d89cc010b2cc397cd0c48 ]
+
+The XOL (execute out-of-line) buffer is used to single-step the
+replaced instruction(s) for uprobes. The RISC-V port was missing a
+proper fence.i (i$ flushing) after constructing the XOL buffer, which
+can result in incorrect execution of stale/broken instructions.
+
+This was found running the BPF selftests "test_progs:
+uprobe_autoattach, attach_probe" on the Spacemit K1/X60, where the
+uprobes tests randomly blew up.
+
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Fixes: 74784081aac8 ("riscv: Add uprobes supported")
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Link: https://lore.kernel.org/r/20250419111402.1660267-2-bjorn@kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/probes/uprobes.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
+index 4b3dc8beaf77d..cc15f7ca6cc17 100644
+--- a/arch/riscv/kernel/probes/uprobes.c
++++ b/arch/riscv/kernel/probes/uprobes.c
+@@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ /* Initialize the slot */
+ void *kaddr = kmap_atomic(page);
+ void *dst = kaddr + (vaddr & ~PAGE_MASK);
++ unsigned long start = (unsigned long)dst;
+
+ memcpy(dst, src, len);
+
+@@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ *(uprobe_opcode_t *)dst = __BUG_INSN_32;
+ }
+
++ flush_icache_range(start, start + len);
+ kunmap_atomic(kaddr);
+-
+- /*
+- * We probably need flush_icache_user_page() but it needs vma.
+- * This should work on most of architectures by default. If
+- * architecture needs to do something different it can define
+- * its own version of the function.
+- */
+- flush_dcache_page(page);
+ }
+--
+2.39.5
+
--- /dev/null
+From dcc369ccce0c356812eff5ac82f4a777f1167cfd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 01:51:24 -0700
+Subject: sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash
+
+From: Omar Sandoval <osandov@fb.com>
+
+[ Upstream commit bbce3de72be56e4b5f68924b7da9630cc89aa1a8 ]
+
+There is a code path in dequeue_entities() that can set the slice of a
+sched_entity to U64_MAX, which sometimes results in a crash.
+
+The offending case is when dequeue_entities() is called to dequeue a
+delayed group entity, and then the entity's parent's dequeue is delayed.
+In that case:
+
+1. In the if (entity_is_task(se)) else block at the beginning of
+ dequeue_entities(), slice is set to
+ cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then
+ it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX.
+2. The first for_each_sched_entity() loop dequeues the entity.
+3. If the entity was its parent's only child, then the next iteration
+ tries to dequeue the parent.
+4. If the parent's dequeue needs to be delayed, then it breaks from the
+ first for_each_sched_entity() loop _without updating slice_.
+5. The second for_each_sched_entity() loop sets the parent's ->slice to
+ the saved slice, which is still U64_MAX.
+
+This throws off subsequent calculations with potentially catastrophic
+results. A manifestation we saw in production was:
+
+6. In update_entity_lag(), se->slice is used to calculate limit, which
+ ends up as a huge negative number.
+7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit
+ is negative, vlag > limit, so se->vlag is set to the same huge
+ negative number.
+8. In place_entity(), se->vlag is scaled, which overflows and results in
+ another huge (positive or negative) number.
+9. The adjusted lag is subtracted from se->vruntime, which increases or
+ decreases se->vruntime by a huge number.
+10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which
+ incorrectly returns false because the vruntime is so far from the
+ other vruntimes on the queue, causing the
+ (vruntime - cfs_rq->min_vruntime) * load calulation to overflow.
+11. Nothing appears to be eligible, so pick_eevdf() returns NULL.
+12. pick_next_entity() tries to dereference the return value of
+ pick_eevdf() and crashes.
+
+Dumping the cfs_rq states from the core dumps with drgn showed tell-tale
+huge vruntime ranges and bogus vlag values, and I also traced se->slice
+being set to U64_MAX on live systems (which was usually "benign" since
+the rest of the runqueue needed to be in a particular state to crash).
+
+Fix it in dequeue_entities() by always setting slice from the first
+non-empty cfs_rq.
+
+Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy")
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 89c7260103e18..3d9b68a347b76 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -7083,9 +7083,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ h_nr_idle = task_has_idle_policy(p);
+ if (task_sleep || task_delayed || !se->sched_delayed)
+ h_nr_runnable = 1;
+- } else {
+- cfs_rq = group_cfs_rq(se);
+- slice = cfs_rq_min_slice(cfs_rq);
+ }
+
+ for_each_sched_entity(se) {
+@@ -7095,6 +7092,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ if (p && &p->se == se)
+ return -1;
+
++ slice = cfs_rq_min_slice(cfs_rq);
+ break;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 8dec9328aa70ff0a30a15998d666b1075188bace Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Mar 2025 11:49:33 +0300
+Subject: scsi: core: Clear flags for scsi_cmnd that did not complete
+
+From: Anastasia Kovaleva <a.kovaleva@yadro.com>
+
+[ Upstream commit 54bebe46871d4e56e05fcf55c1a37e7efa24e0a8 ]
+
+Commands that have not been completed with scsi_done() do not clear the
+SCMD_INITIALIZED flag and therefore will not be properly reinitialized.
+Thus, the next time the scsi_cmnd structure is used, the command may
+fail in scsi_cmd_runtime_exceeded() due to the old jiffies_at_alloc
+value:
+
+ kernel: sd 16:0:1:84: [sdts] tag#405 timing out command, waited 720s
+ kernel: sd 16:0:1:84: [sdts] tag#405 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=66636s
+
+Clear flags for commands that have not been completed by SCSI.
+
+Fixes: 4abafdc4360d ("block: remove the initialize_rq_fn blk_mq_ops method")
+Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
+Link: https://lore.kernel.org/r/20250324084933.15932-2-a.kovaleva@yadro.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/scsi_lib.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index f1cfe0bb89b20..7a31dae9aa82d 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request);
+ */
+ static void scsi_cleanup_rq(struct request *rq)
+ {
++ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
++
++ cmd->flags = 0;
++
+ if (rq->rq_flags & RQF_DONTPREP) {
+- scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq));
++ scsi_mq_uninit_cmd(cmd);
+ rq->rq_flags &= ~RQF_DONTPREP;
+ }
+ }
+--
+2.39.5
+
--- /dev/null
+From 7b11b761461730701e180af729bf71047c1c5afd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Apr 2025 14:59:09 -0500
+Subject: scsi: ufs: core: Add NULL check in
+ ufshcd_mcq_compl_pending_transfer()
+
+From: Chenyuan Yang <chenyuan0y@gmail.com>
+
+[ Upstream commit 08a966a917fe3d92150fa3cc15793ad5e57051eb ]
+
+Add a NULL check for the returned hwq pointer by ufshcd_mcq_req_to_hwq().
+
+This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix
+ufshcd_abort_one racing issue").
+
+Signed-off-by: Chenyuan Yang <chenyuan0y@gmail.com>
+Link: https://lore.kernel.org/r/20250412195909.315418-1-chenyuan0y@gmail.com
+Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode")
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufshcd.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index 464f13da259aa..128e35a848b7b 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -5658,6 +5658,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba,
+ continue;
+
+ hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
++ if (!hwq)
++ continue;
+
+ if (force_compl) {
+ ufshcd_mcq_compl_all_cqes_lock(hba, hwq);
+--
+2.39.5
+
--- /dev/null
+From b610b5487b811b20179fcd2adbf75f2c3e29fbe3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 19:13:20 -0500
+Subject: scsi: ufs: mcq: Add NULL check in ufshcd_mcq_abort()
+
+From: Chenyuan Yang <chenyuan0y@gmail.com>
+
+[ Upstream commit 4c324085062919d4e21c69e5e78456dcec0052fe ]
+
+A race can occur between the MCQ completion path and the abort handler:
+once a request completes, __blk_mq_free_request() sets rq->mq_hctx to
+NULL, meaning the subsequent ufshcd_mcq_req_to_hwq() call in
+ufshcd_mcq_abort() can return a NULL pointer. If this NULL pointer is
+dereferenced, the kernel will crash.
+
+Add a NULL check for the returned hwq pointer. If hwq is NULL, log an
+error and return FAILED, preventing a potential NULL-pointer
+dereference. As suggested by Bart, the ufshcd_cmd_inflight() check is
+removed.
+
+This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix
+ufshcd_abort_one racing issue").
+
+This is found by our static analysis tool KNighter.
+
+Signed-off-by: Chenyuan Yang <chenyuan0y@gmail.com>
+Link: https://lore.kernel.org/r/20250410001320.2219341-1-chenyuan0y@gmail.com
+Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()")
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Peter Wang <peter.wang@mediatek.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufs-mcq.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
+index 240ce135bbfbc..f1294c29f4849 100644
+--- a/drivers/ufs/core/ufs-mcq.c
++++ b/drivers/ufs/core/ufs-mcq.c
+@@ -677,13 +677,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+ unsigned long flags;
+ int err;
+
+- if (!ufshcd_cmd_inflight(lrbp->cmd)) {
+- dev_err(hba->dev,
+- "%s: skip abort. cmd at tag %d already completed.\n",
+- __func__, tag);
+- return FAILED;
+- }
+-
+ /* Skip task abort in case previous aborts failed and report failure */
+ if (lrbp->req_abort_skip) {
+ dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
+@@ -692,6 +685,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+ }
+
+ hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
++ if (!hwq) {
++ dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n",
++ __func__, tag);
++ return FAILED;
++ }
+
+ if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
+ /*
+--
+2.39.5
+
drm-xe-rtp-drop-sentinels-from-arg-to-xe_rtp_process.patch
drm-xe-ensure-fixed_slice_mode-gets-set-after-ccs_mo.patch
lib-kconfig.ubsan-remove-default-ubsan-from-ubsan_in.patch
+ceph-fix-incorrect-flush-end-position-calculation.patch
+cpufreq-sun50i-prevent-out-of-bounds-access.patch
+dma-contiguous-avoid-warning-about-unused-size_bytes.patch
+cpufreq-apple-soc-fix-null-ptr-deref-in-apple_soc_cp.patch
+cpufreq-scmi-fix-null-ptr-deref-in-scmi_cpufreq_get_.patch
+cpufreq-scpi-fix-null-ptr-deref-in-scpi_cpufreq_get_.patch
+scsi-ufs-mcq-add-null-check-in-ufshcd_mcq_abort.patch
+virtio_pci-use-self-group-type-for-cap-commands.patch
+cpufreq-cppc-fix-invalid-return-value-in-.get-callba.patch
+cpufreq-do-not-enable-by-default-during-compile-test.patch
+cpufreq-fix-compile-test-defaults.patch
+btrfs-avoid-page_lockend-underflow-in-btrfs_punch_ho.patch
+btrfs-zoned-return-eio-on-raid1-block-group-write-po.patch
+cgroup-cpuset-v1-add-missing-support-for-cpuset_v2_m.patch
+vhost-scsi-add-better-resource-allocation-failure-ha.patch
+vhost-scsi-fix-vhost_scsi_send_bad_target.patch
+vhost-scsi-fix-vhost_scsi_send_status.patch
+net-mlx5-fix-null-ptr-deref-in-mlx5_create_-inner_-t.patch
+net-mlx5-move-ttc-allocation-after-switch-case-to-pr.patch
+scsi-core-clear-flags-for-scsi_cmnd-that-did-not-com.patch
+scsi-ufs-core-add-null-check-in-ufshcd_mcq_compl_pen.patch
+net-enetc-register-xdp-rx-queues-with-frag_size.patch
+net-enetc-refactor-bulk-flipping-of-rx-buffers-to-se.patch
+net-enetc-fix-frame-corruption-on-bpf_xdp_adjust_hea.patch
+nvmet-fix-out-of-bounds-access-in-nvmet_enable_port.patch
+net-lwtunnel-disable-bhs-when-required.patch
+net-phylink-force-link-down-on-major_config-failure.patch
+net-phylink-fix-suspend-resume-with-wol-enabled-and-.patch
+net-phy-leds-fix-memory-leak.patch
+virtio-net-refactor-napi_enable-paths.patch
+virtio-net-refactor-napi_disable-paths.patch
+virtio-net-disable-delayed-refill-when-pausing-rx.patch
+tipc-fix-null-pointer-dereference-in-tipc_mon_reinit.patch
+net-ethernet-mtk_eth_soc-net-revise-netsysv3-hardwar.patch
+fix-a-couple-of-races-in-mnt_tree_beneath-handling-b.patch
+net_sched-hfsc-fix-a-uaf-vulnerability-in-class-hand.patch
+net_sched-hfsc-fix-a-potential-uaf-in-hfsc_dequeue-t.patch
+net-dsa-mt7530-sync-driver-specific-behavior-of-mt75.patch
+pds_core-prevent-possible-adminq-overflow-stuck-cond.patch
+pds_core-handle-unsupported-pds_core_cmd_fw_control-.patch
+pds_core-remove-unnecessary-check-in-pds_client_admi.patch
+pds_core-make-wait_context-part-of-q_info.patch
+net-phy-add-helper-for-getting-tx-amplitude-gain.patch
+net-phy-dp83822-add-support-for-changing-the-transmi.patch
+net-dp83822-fix-of_mdio-config-check.patch
+net-stmmac-fix-dwmac1000-ptp-timestamp-status-offset.patch
+net-stmmac-fix-multiplication-overflow-when-reading-.patch
+block-never-reduce-ra_pages-in-blk_apply_bdi_limits.patch
+bdev-use-bdev_io_min-for-statx-block-size.patch
+block-move-blkdev_-get-put-_no_open-prototypes-out-o.patch
+block-remove-the-backing_inode-variable-in-bdev_stat.patch
+block-don-t-autoload-drivers-on-stat.patch
+iommu-amd-return-an-error-if-vcpu-affinity-is-set-fo.patch
+riscv-replace-function-like-macro-by-static-inline-f.patch
+riscv-uprobes-add-missing-fence.i-after-building-the.patch
+ublk-remove-io_cmds-list-in-ublk_queue.patch
+ublk-comment-on-ubq-canceling-handling-in-ublk_queue.patch
+ublk-implement-queue_rqs.patch
+ublk-remove-unused-cmd-argument-to-ublk_dispatch_req.patch
+ublk-call-ublk_dispatch_req-for-handling-ublk_u_io_n.patch
+splice-remove-duplicate-noinline-from-pipe_clear_now.patch
+fs-xattr-fix-handling-of-at_fdcwd-in-setxattrat-2-an.patch
+bpf-add-namespace-to-bpf-internal-symbols.patch
+revert-drm-meson-vclk-fix-calculation-of-59.94-fract.patch
+drm-meson-use-unsigned-long-long-hz-for-frequency-ty.patch
+perf-x86-fix-non-sampling-counting-events-on-certain.patch
+loongarch-select-arch_use_memtest.patch
+loongarch-make-regs_irqs_disabled-more-clear.patch
+loongarch-make-do_xyz-exception-handlers-more-robust.patch
+sched-eevdf-fix-se-slice-being-set-to-u64_max-and-re.patch
--- /dev/null
+From f4cb876304059194a706b7501c4556237d757ffe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Apr 2025 18:00:23 +0000
+Subject: splice: remove duplicate noinline from pipe_clear_nowait
+
+From: T.J. Mercier <tjmercier@google.com>
+
+[ Upstream commit e6f141b332ddd9007756751b6afd24f799488fd8 ]
+
+pipe_clear_nowait has two noinline macros, but we only need one.
+
+I checked the whole tree, and this is the only occurrence:
+
+$ grep -r "noinline .* noinline"
+fs/splice.c:static noinline void noinline pipe_clear_nowait(struct file *file)
+$
+
+Fixes: 0f99fc513ddd ("splice: clear FMODE_NOWAIT on file if splice/vmsplice is used")
+Signed-off-by: "T.J. Mercier" <tjmercier@google.com>
+Link: https://lore.kernel.org/20250423180025.2627670-1-tjmercier@google.com
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/splice.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/splice.c b/fs/splice.c
+index 23fa5561b9441..bd6e889133f5c 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -45,7 +45,7 @@
+ * here if set to avoid blocking other users of this pipe if splice is
+ * being done on it.
+ */
+-static noinline void noinline pipe_clear_nowait(struct file *file)
++static noinline void pipe_clear_nowait(struct file *file)
+ {
+ fmode_t fmode = READ_ONCE(file->f_mode);
+
+--
+2.39.5
+
--- /dev/null
+From dac4243061f6bfe69e2324cf74bd5c2d53a5e5b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 14:47:15 +0700
+Subject: tipc: fix NULL pointer dereference in tipc_mon_reinit_self()
+
+From: Tung Nguyen <tung.quang.nguyen@est.tech>
+
+[ Upstream commit d63527e109e811ef11abb1c2985048fdb528b4cb ]
+
+syzbot reported:
+
+tipc: Node number set to 1055423674
+Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI
+KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+CPU: 3 UID: 0 PID: 6017 Comm: kworker/3:5 Not tainted 6.15.0-rc1-syzkaller-00246-g900241a5cc15 #0 PREEMPT(full)
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
+Workqueue: events tipc_net_finalize_work
+RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719
+...
+RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba
+RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010
+RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007
+R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010
+FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ tipc_net_finalize+0x10b/0x180 net/tipc/net.c:140
+ process_one_work+0x9cc/0x1b70 kernel/workqueue.c:3238
+ process_scheduled_works kernel/workqueue.c:3319 [inline]
+ worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400
+ kthread+0x3c2/0x780 kernel/kthread.c:464
+ ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
+ </TASK>
+...
+RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719
+...
+RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba
+RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010
+RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007
+R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010
+FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+
+There is a racing condition between workqueue created when enabling
+bearer and another thread created when disabling bearer right after
+that as follow:
+
+enabling_bearer | disabling_bearer
+--------------- | ----------------
+tipc_disc_timeout() |
+{ | bearer_disable()
+ ... | {
+ schedule_work(&tn->work); | tipc_mon_delete()
+ ... | {
+} | ...
+ | write_lock_bh(&mon->lock);
+ | mon->self = NULL;
+ | write_unlock_bh(&mon->lock);
+ | ...
+ | }
+tipc_net_finalize_work() | }
+{ |
+ ... |
+ tipc_net_finalize() |
+ { |
+ ... |
+ tipc_mon_reinit_self() |
+ { |
+ ... |
+ write_lock_bh(&mon->lock); |
+ mon->self->addr = tipc_own_addr(net); |
+ write_unlock_bh(&mon->lock); |
+ ... |
+ } |
+ ... |
+ } |
+ ... |
+} |
+
+'mon->self' is set to NULL in disabling_bearer thread and dereferenced
+later in enabling_bearer thread.
+
+This commit fixes this issue by validating 'mon->self' before assigning
+node address to it.
+
+Reported-by: syzbot+ed60da8d686dc709164c@syzkaller.appspotmail.com
+Fixes: 46cb01eeeb86 ("tipc: update mon's self addr when node addr generated")
+Signed-off-by: Tung Nguyen <tung.quang.nguyen@est.tech>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250417074826.578115-1-tung.quang.nguyen@est.tech
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/monitor.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
+index e2f19627e43d5..b45c5b91bc7af 100644
+--- a/net/tipc/monitor.c
++++ b/net/tipc/monitor.c
+@@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net)
+ if (!mon)
+ continue;
+ write_lock_bh(&mon->lock);
+- mon->self->addr = tipc_own_addr(net);
++ if (mon->self)
++ mon->self->addr = tipc_own_addr(net);
+ write_unlock_bh(&mon->lock);
+ }
+ }
+--
+2.39.5
+
--- /dev/null
+From a8096321d69219c81be7fbdbbec6ff992604e3c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Apr 2025 09:37:39 +0800
+Subject: ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit d6aa0c178bf81f30ae4a780b2bca653daa2eb633 ]
+
+We call io_uring_cmd_complete_in_task() to schedule task_work for handling
+UBLK_U_IO_NEED_GET_DATA.
+
+This way is really not necessary because the current context is exactly
+the ublk queue context, so call ublk_dispatch_req() directly for handling
+UBLK_U_IO_NEED_GET_DATA.
+
+Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd")
+Tested-by: Jared Holzman <jholzman@nvidia.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250425013742.1079549-2-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index 437297022dcfa..c7761a5cfeec0 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1812,15 +1812,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
+ mutex_unlock(&ub->mutex);
+ }
+
+-static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
+- int tag)
+-{
+- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+- struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
+-
+- ublk_queue_cmd(ubq, req);
+-}
+-
+ static inline int ublk_check_cmd_op(u32 cmd_op)
+ {
+ u32 ioc_type = _IOC_TYPE(cmd_op);
+@@ -1967,8 +1958,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+- ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
+- break;
++ req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
++ ublk_dispatch_req(ubq, req, issue_flags);
++ return -EIOCBQUEUED;
+ default:
+ goto out;
+ }
+--
+2.39.5
+
--- /dev/null
+From aace6ac562119968ef4e5e594d290167f605a638 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Mar 2025 17:51:11 +0800
+Subject: ublk: comment on ubq->canceling handling in ublk_queue_rq()
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 7e2fe01a69f6be3e284b38cfd2e4e0598a3b0a8f ]
+
+In ublk_queue_rq(), ubq->canceling has to be handled after ->fail_io and
+->force_abort are dealt with, otherwise the request may not be failed
+when deleting disk.
+
+Add comment on this usage.
+
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250327095123.179113-3-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index f615b9bd82f5f..fbc397efff175 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1314,6 +1314,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+ if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
+ return BLK_STS_IOERR;
+
++ /*
++ * ->canceling has to be handled after ->force_abort and ->fail_io
++ * is dealt with, otherwise this request may not be failed in case
++ * of recovery, and cause hang when deleting disk
++ */
+ if (unlikely(ubq->canceling)) {
+ __ublk_abort_rq(ubq, rq);
+ return BLK_STS_OK;
+--
+2.39.5
+
--- /dev/null
+From e7adb0fb5c38e61877803bbec11aa7527833e7c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Mar 2025 17:51:17 +0800
+Subject: ublk: implement ->queue_rqs()
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit d796cea7b9f33b6315362f504b15fcc26d678493 ]
+
+Implement ->queue_rqs() for improving perf in case of MQ.
+
+In this way, we just need to call io_uring_cmd_complete_in_task() once for
+whole IO batch, then both io_uring and ublk server can get exact batch from
+ublk frontend.
+
+Follows IOPS improvement:
+
+- tests
+
+ tools/testing/selftests/ublk/kublk add -t null -q 2 [-z]
+
+ fio/t/io_uring -p0 /dev/ublkb0
+
+- results:
+
+ more than 10% IOPS boost observed
+
+Pass all ublk selftests, especially the io dispatch order test.
+
+Cc: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250327095123.179113-9-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 131 +++++++++++++++++++++++++++++++++------
+ 1 file changed, 111 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index fbc397efff175..e1388a9b1e2d1 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -77,6 +77,20 @@ struct ublk_rq_data {
+ };
+
+ struct ublk_uring_cmd_pdu {
++ /*
++ * Store requests in same batch temporarily for queuing them to
++ * daemon context.
++ *
++ * It should have been stored to request payload, but we do want
++ * to avoid extra pre-allocation, and uring_cmd payload is always
++ * free for us
++ */
++ struct request *req_list;
++
++ /*
++ * The following two are valid in this cmd whole lifetime, and
++ * setup in ublk uring_cmd handler
++ */
+ struct ublk_queue *ubq;
+ u16 tag;
+ };
+@@ -1159,14 +1173,12 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ }
+
+-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
+- unsigned int issue_flags)
++static void ublk_dispatch_req(struct ublk_queue *ubq,
++ struct io_uring_cmd *cmd,
++ struct request *req,
++ unsigned int issue_flags)
+ {
+- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+- struct ublk_queue *ubq = pdu->ubq;
+- int tag = pdu->tag;
+- struct request *req = blk_mq_tag_to_rq(
+- ubq->dev->tag_set.tags[ubq->q_id], tag);
++ int tag = req->tag;
+ struct ublk_io *io = &ubq->ios[tag];
+ unsigned int mapped_bytes;
+
+@@ -1241,6 +1253,18 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
+ }
+
++static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
++ unsigned int issue_flags)
++{
++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
++ struct ublk_queue *ubq = pdu->ubq;
++ int tag = pdu->tag;
++ struct request *req = blk_mq_tag_to_rq(
++ ubq->dev->tag_set.tags[ubq->q_id], tag);
++
++ ublk_dispatch_req(ubq, cmd, req, issue_flags);
++}
++
+ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
+ {
+ struct ublk_io *io = &ubq->ios[rq->tag];
+@@ -1248,6 +1272,35 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
+ io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
+ }
+
++static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
++ unsigned int issue_flags)
++{
++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
++ struct request *rq = pdu->req_list;
++ struct ublk_queue *ubq = rq->mq_hctx->driver_data;
++ struct request *next;
++
++ while (rq) {
++ struct ublk_io *io = &ubq->ios[rq->tag];
++
++ next = rq->rq_next;
++ rq->rq_next = NULL;
++ ublk_dispatch_req(ubq, io->cmd, rq, issue_flags);
++ rq = next;
++ }
++}
++
++static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
++{
++ struct request *rq = rq_list_peek(l);
++ struct ublk_io *io = &ubq->ios[rq->tag];
++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
++
++ pdu->req_list = rq;
++ rq_list_init(l);
++ io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb);
++}
++
+ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+ {
+ struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+@@ -1286,21 +1339,12 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+ return BLK_EH_RESET_TIMER;
+ }
+
+-static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+- const struct blk_mq_queue_data *bd)
++static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq)
+ {
+- struct ublk_queue *ubq = hctx->driver_data;
+- struct request *rq = bd->rq;
+ blk_status_t res;
+
+- if (unlikely(ubq->fail_io)) {
++ if (unlikely(ubq->fail_io))
+ return BLK_STS_TARGET;
+- }
+-
+- /* fill iod to slot in io cmd buffer */
+- res = ublk_setup_iod(ubq, rq);
+- if (unlikely(res != BLK_STS_OK))
+- return BLK_STS_IOERR;
+
+ /* With recovery feature enabled, force_abort is set in
+ * ublk_stop_dev() before calling del_gendisk(). We have to
+@@ -1314,6 +1358,29 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+ if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
+ return BLK_STS_IOERR;
+
++ if (unlikely(ubq->canceling))
++ return BLK_STS_IOERR;
++
++ /* fill iod to slot in io cmd buffer */
++ res = ublk_setup_iod(ubq, rq);
++ if (unlikely(res != BLK_STS_OK))
++ return BLK_STS_IOERR;
++
++ blk_mq_start_request(rq);
++ return BLK_STS_OK;
++}
++
++static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
++ const struct blk_mq_queue_data *bd)
++{
++ struct ublk_queue *ubq = hctx->driver_data;
++ struct request *rq = bd->rq;
++ blk_status_t res;
++
++ res = ublk_prep_req(ubq, rq);
++ if (res != BLK_STS_OK)
++ return res;
++
+ /*
+ * ->canceling has to be handled after ->force_abort and ->fail_io
+ * is dealt with, otherwise this request may not be failed in case
+@@ -1324,12 +1391,35 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+ return BLK_STS_OK;
+ }
+
+- blk_mq_start_request(bd->rq);
+ ublk_queue_cmd(ubq, rq);
+-
+ return BLK_STS_OK;
+ }
+
++static void ublk_queue_rqs(struct rq_list *rqlist)
++{
++ struct rq_list requeue_list = { };
++ struct rq_list submit_list = { };
++ struct ublk_queue *ubq = NULL;
++ struct request *req;
++
++ while ((req = rq_list_pop(rqlist))) {
++ struct ublk_queue *this_q = req->mq_hctx->driver_data;
++
++ if (ubq && ubq != this_q && !rq_list_empty(&submit_list))
++ ublk_queue_cmd_list(ubq, &submit_list);
++ ubq = this_q;
++
++ if (ublk_prep_req(ubq, req) == BLK_STS_OK)
++ rq_list_add_tail(&submit_list, req);
++ else
++ rq_list_add_tail(&requeue_list, req);
++ }
++
++ if (ubq && !rq_list_empty(&submit_list))
++ ublk_queue_cmd_list(ubq, &submit_list);
++ *rqlist = requeue_list;
++}
++
+ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+ unsigned int hctx_idx)
+ {
+@@ -1342,6 +1432,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
+
+ static const struct blk_mq_ops ublk_mq_ops = {
+ .queue_rq = ublk_queue_rq,
++ .queue_rqs = ublk_queue_rqs,
+ .init_hctx = ublk_init_hctx,
+ .timeout = ublk_timeout,
+ };
+--
+2.39.5
+
--- /dev/null
+From 3592feec8c9516c2ba65dd0d06f7e6f246748fc8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Mar 2025 12:14:17 -0600
+Subject: ublk: remove io_cmds list in ublk_queue
+
+From: Uday Shankar <ushankar@purestorage.com>
+
+[ Upstream commit 989bcd623a8b0c32b76d9258767d8b37e53419e6 ]
+
+The current I/O dispatch mechanism - queueing I/O by adding it to the
+io_cmds list (and poking task_work as needed), then dispatching it in
+ublk server task context by reversing io_cmds and completing the
+io_uring command associated to each one - was introduced by commit
+7d4a93176e014 ("ublk_drv: don't forward io commands in reserve order")
+to ensure that the ublk server received I/O in the same order that the
+block layer submitted it to ublk_drv. This mechanism was only needed for
+the "raw" task_work submission mechanism, since the io_uring task work
+wrapper maintains FIFO ordering (using quite a similar mechanism in
+fact). The "raw" task_work submission mechanism is no longer supported
+in ublk_drv as of commit 29dc5d06613f2 ("ublk: kill queuing request by
+task_work_add"), so the explicit llist/reversal is no longer needed - it
+just duplicates logic already present in the underlying io_uring APIs.
+Remove it.
+
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250318-ublk_io_cmds-v1-1-c1bb74798fef@purestorage.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 46 ++++++++++------------------------------
+ 1 file changed, 11 insertions(+), 35 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index 971b793dedd03..f615b9bd82f5f 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -73,8 +73,6 @@
+ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
+
+ struct ublk_rq_data {
+- struct llist_node node;
+-
+ struct kref ref;
+ };
+
+@@ -141,8 +139,6 @@ struct ublk_queue {
+ struct task_struct *ubq_daemon;
+ char *io_cmd_buf;
+
+- struct llist_head io_cmds;
+-
+ unsigned long io_addr; /* mapped vm address */
+ unsigned int max_io_sz;
+ bool force_abort;
+@@ -1114,7 +1110,7 @@ static void ublk_fail_rq_fn(struct kref *ref)
+ }
+
+ /*
+- * Since __ublk_rq_task_work always fails requests immediately during
++ * Since ublk_rq_task_work_cb always fails requests immediately during
+ * exiting, __ublk_fail_req() is only called from abort context during
+ * exiting. So lock is unnecessary.
+ *
+@@ -1163,11 +1159,14 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ }
+
+-static inline void __ublk_rq_task_work(struct request *req,
+- unsigned issue_flags)
++static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
++ unsigned int issue_flags)
+ {
+- struct ublk_queue *ubq = req->mq_hctx->driver_data;
+- int tag = req->tag;
++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
++ struct ublk_queue *ubq = pdu->ubq;
++ int tag = pdu->tag;
++ struct request *req = blk_mq_tag_to_rq(
++ ubq->dev->tag_set.tags[ubq->q_id], tag);
+ struct ublk_io *io = &ubq->ios[tag];
+ unsigned int mapped_bytes;
+
+@@ -1242,34 +1241,11 @@ static inline void __ublk_rq_task_work(struct request *req,
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
+ }
+
+-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
+- unsigned issue_flags)
+-{
+- struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
+- struct ublk_rq_data *data, *tmp;
+-
+- io_cmds = llist_reverse_order(io_cmds);
+- llist_for_each_entry_safe(data, tmp, io_cmds, node)
+- __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
+-}
+-
+-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
+-{
+- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+- struct ublk_queue *ubq = pdu->ubq;
+-
+- ublk_forward_io_cmds(ubq, issue_flags);
+-}
+-
+ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
+ {
+- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
++ struct ublk_io *io = &ubq->ios[rq->tag];
+
+- if (llist_add(&data->node, &ubq->io_cmds)) {
+- struct ublk_io *io = &ubq->ios[rq->tag];
+-
+- io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
+- }
++ io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
+ }
+
+ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+@@ -1462,7 +1438,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
+ struct request *rq;
+
+ /*
+- * Either we fail the request or ublk_rq_task_work_fn
++ * Either we fail the request or ublk_rq_task_work_cb
+ * will do it
+ */
+ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
+--
+2.39.5
+
--- /dev/null
+From ef38abf68f31a1404b87783b98771524384aa22a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Mar 2025 12:04:07 -0600
+Subject: ublk: remove unused cmd argument to ublk_dispatch_req()
+
+From: Caleb Sander Mateos <csander@purestorage.com>
+
+[ Upstream commit dfbce8b798fb848a42706e2e544b78b3db22aaae ]
+
+ublk_dispatch_req() never uses its struct io_uring_cmd *cmd argument.
+Drop it so callers don't have to pass a value.
+
+Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
+Link: https://lore.kernel.org/r/20250328180411.2696494-2-csander@purestorage.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index e1388a9b1e2d1..437297022dcfa 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1174,7 +1174,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+ }
+
+ static void ublk_dispatch_req(struct ublk_queue *ubq,
+- struct io_uring_cmd *cmd,
+ struct request *req,
+ unsigned int issue_flags)
+ {
+@@ -1262,7 +1261,7 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,
+ struct request *req = blk_mq_tag_to_rq(
+ ubq->dev->tag_set.tags[ubq->q_id], tag);
+
+- ublk_dispatch_req(ubq, cmd, req, issue_flags);
++ ublk_dispatch_req(ubq, req, issue_flags);
+ }
+
+ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
+@@ -1281,11 +1280,9 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
+ struct request *next;
+
+ while (rq) {
+- struct ublk_io *io = &ubq->ios[rq->tag];
+-
+ next = rq->rq_next;
+ rq->rq_next = NULL;
+- ublk_dispatch_req(ubq, io->cmd, rq, issue_flags);
++ ublk_dispatch_req(ubq, rq, issue_flags);
+ rq = next;
+ }
+ }
+--
+2.39.5
+
--- /dev/null
+From 9a8362f15dd715e5c29fcbfaf80861594d065289 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 13:15:10 -0600
+Subject: vhost-scsi: Add better resource allocation failure handling
+
+From: Mike Christie <michael.christie@oracle.com>
+
+[ Upstream commit 3ca51662f8186b569b8fb282242c20ccbb3993c2 ]
+
+If we can't allocate mem to map in data for a request or can't find
+a tag for a command, we currently drop the command. This leads to the
+error handler running to clean it up. Instead of dropping the command
+this has us return an error telling the initiator that it queued more
+commands than we can handle. The initiator will then reduce how many
+commands it will send us and retry later.
+
+Signed-off-by: Mike Christie <michael.christie@oracle.com>
+Message-Id: <20241203191705.19431-4-michael.christie@oracle.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+Stable-dep-of: b18268713547 ("vhost-scsi: Fix vhost_scsi_send_bad_target()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/scsi.c | 28 +++++++++++++++++++++++++---
+ 1 file changed, 25 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index 7aeff435c1d87..ad7fa5bc0f5fc 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -630,7 +630,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
+
+ tag = sbitmap_get(&svq->scsi_tags);
+ if (tag < 0) {
+- pr_err("Unable to obtain tag for vhost_scsi_cmd\n");
++ pr_warn_once("Guest sent too many cmds. Returning TASK_SET_FULL.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+@@ -929,6 +929,24 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd)
+ target_submit(se_cmd);
+ }
+
++static void
++vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
++ int head, unsigned int out, u8 status)
++{
++ struct virtio_scsi_cmd_resp __user *resp;
++ struct virtio_scsi_cmd_resp rsp;
++ int ret;
++
++ memset(&rsp, 0, sizeof(rsp));
++ rsp.status = status;
++ resp = vq->iov[out].iov_base;
++ ret = __copy_to_user(resp, &rsp, sizeof(rsp));
++ if (!ret)
++ vhost_add_used_and_signal(&vs->dev, vq, head, 0);
++ else
++ pr_err("Faulted on virtio_scsi_cmd_resp\n");
++}
++
+ static void
+ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
+ struct vhost_virtqueue *vq,
+@@ -1216,8 +1234,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ exp_data_len + prot_bytes,
+ data_direction);
+ if (IS_ERR(cmd)) {
+- vq_err(vq, "vhost_scsi_get_cmd failed %ld\n",
+- PTR_ERR(cmd));
++ ret = PTR_ERR(cmd);
++ vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret);
+ goto err;
+ }
+ cmd->tvc_vhost = vs;
+@@ -1254,11 +1272,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ * EINVAL: Invalid response buffer, drop the request
+ * EIO: Respond with bad target
+ * EAGAIN: Pending request
++ * ENOMEM: Could not allocate resources for request
+ */
+ if (ret == -ENXIO)
+ break;
+ else if (ret == -EIO)
+ vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
++ else if (ret == -ENOMEM)
++ vhost_scsi_send_status(vs, vq, vc.head, vc.out,
++ SAM_STAT_TASK_SET_FULL);
+ } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
+ out:
+ mutex_unlock(&vq->mutex);
+--
+2.39.5
+
--- /dev/null
+From 417fcef3b0e556c385eac6accf84fe3ab2b13d43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Apr 2025 23:29:47 -0700
+Subject: vhost-scsi: Fix vhost_scsi_send_bad_target()
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+[ Upstream commit b182687135474d7ed905a07cc6cb2734b359e13e ]
+
+Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was
+signaled by the commit 664ed90e621c ("vhost/scsi: Set
+VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"),
+vhost_scsi_send_bad_target() still assumes the response in a single
+descriptor.
+
+In addition, although vhost_scsi_send_bad_target() is used by both I/O
+queue and control queue, the response header is always
+virtio_scsi_cmd_resp. It is required to use virtio_scsi_ctrl_tmf_resp or
+virtio_scsi_ctrl_an_resp for control queue.
+
+Fixes: 664ed90e621c ("vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Mike Christie <michael.christie@oracle.com>
+Message-Id: <20250403063028.16045-3-dongli.zhang@oracle.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/scsi.c | 48 ++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 37 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index ad7fa5bc0f5fc..7bfe5e5865fe9 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -947,23 +947,46 @@ vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+ pr_err("Faulted on virtio_scsi_cmd_resp\n");
+ }
+
++#define TYPE_IO_CMD 0
++#define TYPE_CTRL_TMF 1
++#define TYPE_CTRL_AN 2
++
+ static void
+ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
+ struct vhost_virtqueue *vq,
+- int head, unsigned out)
++ struct vhost_scsi_ctx *vc, int type)
+ {
+- struct virtio_scsi_cmd_resp __user *resp;
+- struct virtio_scsi_cmd_resp rsp;
++ union {
++ struct virtio_scsi_cmd_resp cmd;
++ struct virtio_scsi_ctrl_tmf_resp tmf;
++ struct virtio_scsi_ctrl_an_resp an;
++ } rsp;
++ struct iov_iter iov_iter;
++ size_t rsp_size;
+ int ret;
+
+ memset(&rsp, 0, sizeof(rsp));
+- rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
+- resp = vq->iov[out].iov_base;
+- ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+- if (!ret)
+- vhost_add_used_and_signal(&vs->dev, vq, head, 0);
++
++ if (type == TYPE_IO_CMD) {
++ rsp_size = sizeof(struct virtio_scsi_cmd_resp);
++ rsp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET;
++ } else if (type == TYPE_CTRL_TMF) {
++ rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
++ rsp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
++ } else {
++ rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
++ rsp.an.response = VIRTIO_SCSI_S_BAD_TARGET;
++ }
++
++ iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in,
++ rsp_size);
++
++ ret = copy_to_iter(&rsp, rsp_size, &iov_iter);
++
++ if (likely(ret == rsp_size))
++ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+ else
+- pr_err("Faulted on virtio_scsi_cmd_resp\n");
++ pr_err("Faulted on virtio scsi type=%d\n", type);
+ }
+
+ static int
+@@ -1277,7 +1300,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ if (ret == -ENXIO)
+ break;
+ else if (ret == -EIO)
+- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
++ vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD);
+ else if (ret == -ENOMEM)
+ vhost_scsi_send_status(vs, vq, vc.head, vc.out,
+ SAM_STAT_TASK_SET_FULL);
+@@ -1510,7 +1533,10 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ if (ret == -ENXIO)
+ break;
+ else if (ret == -EIO)
+- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
++ vhost_scsi_send_bad_target(vs, vq, &vc,
++ v_req.type == VIRTIO_SCSI_T_TMF ?
++ TYPE_CTRL_TMF :
++ TYPE_CTRL_AN);
+ } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
+ out:
+ mutex_unlock(&vq->mutex);
+--
+2.39.5
+
--- /dev/null
+From e1346b9b2d7f0cab13c6851d22e7261b6bbd3adb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Apr 2025 23:29:48 -0700
+Subject: vhost-scsi: Fix vhost_scsi_send_status()
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+[ Upstream commit 58465d86071b61415e25fb054201f61e83d21465 ]
+
+Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was
+signaled by the commit 664ed90e621c ("vhost/scsi: Set
+VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"),
+vhost_scsi_send_bad_target() still assumes the response in a single
+descriptor.
+
+Similar issue in vhost_scsi_send_bad_target() has been fixed in previous
+commit. In addition, similar issue for vhost_scsi_complete_cmd_work() has
+been fixed by the commit 6dd88fd59da8 ("vhost-scsi: unbreak any layout for
+response").
+
+Fixes: 3ca51662f818 ("vhost-scsi: Add better resource allocation failure handling")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Mike Christie <michael.christie@oracle.com>
+Message-Id: <20250403063028.16045-4-dongli.zhang@oracle.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/scsi.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
+index 7bfe5e5865fe9..35a03306d1345 100644
+--- a/drivers/vhost/scsi.c
++++ b/drivers/vhost/scsi.c
+@@ -931,18 +931,22 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd)
+
+ static void
+ vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+- int head, unsigned int out, u8 status)
++ struct vhost_scsi_ctx *vc, u8 status)
+ {
+- struct virtio_scsi_cmd_resp __user *resp;
+ struct virtio_scsi_cmd_resp rsp;
++ struct iov_iter iov_iter;
+ int ret;
+
+ memset(&rsp, 0, sizeof(rsp));
+ rsp.status = status;
+- resp = vq->iov[out].iov_base;
+- ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+- if (!ret)
+- vhost_add_used_and_signal(&vs->dev, vq, head, 0);
++
++ iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in,
++ sizeof(rsp));
++
++ ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter);
++
++ if (likely(ret == sizeof(rsp)))
++ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+ else
+ pr_err("Faulted on virtio_scsi_cmd_resp\n");
+ }
+@@ -1302,7 +1306,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+ else if (ret == -EIO)
+ vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD);
+ else if (ret == -ENOMEM)
+- vhost_scsi_send_status(vs, vq, vc.head, vc.out,
++ vhost_scsi_send_status(vs, vq, &vc,
+ SAM_STAT_TASK_SET_FULL);
+ } while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
+ out:
+--
+2.39.5
+
--- /dev/null
+From 2b13d4bec85fa1b93e6f926685af7414c1db394e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Apr 2025 14:28:03 +0700
+Subject: virtio-net: disable delayed refill when pausing rx
+
+From: Bui Quang Minh <minhquangbui99@gmail.com>
+
+[ Upstream commit 4bc12818b363bd30f0f7348dd9ab077290a637ae ]
+
+When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
+napi_disable() on the receive queue's napi. In delayed refill_work, it
+also calls napi_disable() on the receive queue's napi. When
+napi_disable() is called on an already disabled napi, it will sleep in
+napi_disable_locked while still holding the netdev_lock. As a result,
+later napi_enable gets stuck too as it cannot acquire the netdev_lock.
+This leads to refill_work and the pause-then-resume tx are stuck
+altogether.
+
+This scenario can be reproducible by binding a XDP socket to virtio-net
+interface without setting up the fill ring. As a result, try_fill_recv
+will fail until the fill ring is set up and refill_work is scheduled.
+
+This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the
+virtnet_rx_resume to disable future and cancel all inflights delayed
+refill_work before calling napi_disable() to pause the rx.
+
+Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://patch.msgid.link/20250417072806.18660-2-minhquangbui99@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 57 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 44dbb991787ed..3e4896d9537ee 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -3318,7 +3318,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
+ return NETDEV_TX_OK;
+ }
+
+-static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
++static void __virtnet_rx_pause(struct virtnet_info *vi,
++ struct receive_queue *rq)
+ {
+ bool running = netif_running(vi->dev);
+
+@@ -3328,17 +3329,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
+ }
+ }
+
+-static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
++static void virtnet_rx_pause_all(struct virtnet_info *vi)
++{
++ int i;
++
++ /*
++ * Make sure refill_work does not run concurrently to
++ * avoid napi_disable race which leads to deadlock.
++ */
++ disable_delayed_refill(vi);
++ cancel_delayed_work_sync(&vi->refill);
++ for (i = 0; i < vi->max_queue_pairs; i++)
++ __virtnet_rx_pause(vi, &vi->rq[i]);
++}
++
++static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
++{
++ /*
++ * Make sure refill_work does not run concurrently to
++ * avoid napi_disable race which leads to deadlock.
++ */
++ disable_delayed_refill(vi);
++ cancel_delayed_work_sync(&vi->refill);
++ __virtnet_rx_pause(vi, rq);
++}
++
++static void __virtnet_rx_resume(struct virtnet_info *vi,
++ struct receive_queue *rq,
++ bool refill)
+ {
+ bool running = netif_running(vi->dev);
+
+- if (!try_fill_recv(vi, rq, GFP_KERNEL))
++ if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
+ schedule_delayed_work(&vi->refill, 0);
+
+ if (running)
+ virtnet_napi_enable(rq);
+ }
+
++static void virtnet_rx_resume_all(struct virtnet_info *vi)
++{
++ int i;
++
++ enable_delayed_refill(vi);
++ for (i = 0; i < vi->max_queue_pairs; i++) {
++ if (i < vi->curr_queue_pairs)
++ __virtnet_rx_resume(vi, &vi->rq[i], true);
++ else
++ __virtnet_rx_resume(vi, &vi->rq[i], false);
++ }
++}
++
++static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
++{
++ enable_delayed_refill(vi);
++ __virtnet_rx_resume(vi, rq, true);
++}
++
+ static int virtnet_rx_resize(struct virtnet_info *vi,
+ struct receive_queue *rq, u32 ring_num)
+ {
+@@ -5939,12 +5986,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ if (prog)
+ bpf_prog_add(prog, vi->max_queue_pairs - 1);
+
++ virtnet_rx_pause_all(vi);
++
+ /* Make sure NAPI is not using any XDP TX queues for RX. */
+ if (netif_running(dev)) {
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- virtnet_napi_disable(&vi->rq[i]);
++ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_napi_tx_disable(&vi->sq[i]);
+- }
+ }
+
+ if (!prog) {
+@@ -5976,13 +6023,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ vi->xdp_enabled = false;
+ }
+
++ virtnet_rx_resume_all(vi);
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (old_prog)
+ bpf_prog_put(old_prog);
+- if (netif_running(dev)) {
+- virtnet_napi_enable(&vi->rq[i]);
++ if (netif_running(dev))
+ virtnet_napi_tx_enable(&vi->sq[i]);
+- }
+ }
+
+ return 0;
+@@ -5994,11 +6040,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
+ }
+
++ virtnet_rx_resume_all(vi);
+ if (netif_running(dev)) {
+- for (i = 0; i < vi->max_queue_pairs; i++) {
+- virtnet_napi_enable(&vi->rq[i]);
++ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_napi_tx_enable(&vi->sq[i]);
+- }
+ }
+ if (prog)
+ bpf_prog_sub(prog, vi->max_queue_pairs - 1);
+--
+2.39.5
+
--- /dev/null
+From 9f8c122ada67ec953e09ee06e85247754aece694 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Mar 2025 01:12:10 +0000
+Subject: virtio-net: Refactor napi_disable paths
+
+From: Joe Damato <jdamato@fastly.com>
+
+[ Upstream commit 986a93045183ae2f13e6d99d990ae8be36f6d6b0 ]
+
+Create virtnet_napi_disable helper and refactor virtnet_napi_tx_disable
+to take a struct send_queue.
+
+Signed-off-by: Joe Damato <jdamato@fastly.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Tested-by: Lei Yang <leiyang@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://patch.msgid.link/20250307011215.266806-3-jdamato@fastly.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index d25f68004f97e..44dbb991787ed 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2827,12 +2827,21 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
+ virtnet_napi_do_enable(sq->vq, napi);
+ }
+
+-static void virtnet_napi_tx_disable(struct napi_struct *napi)
++static void virtnet_napi_tx_disable(struct send_queue *sq)
+ {
++ struct napi_struct *napi = &sq->napi;
++
+ if (napi->weight)
+ napi_disable(napi);
+ }
+
++static void virtnet_napi_disable(struct receive_queue *rq)
++{
++ struct napi_struct *napi = &rq->napi;
++
++ napi_disable(napi);
++}
++
+ static void refill_work(struct work_struct *work)
+ {
+ struct virtnet_info *vi =
+@@ -2843,7 +2852,7 @@ static void refill_work(struct work_struct *work)
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ struct receive_queue *rq = &vi->rq[i];
+
+- napi_disable(&rq->napi);
++ virtnet_napi_disable(rq);
+ still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
+ virtnet_napi_enable(rq);
+
+@@ -3042,8 +3051,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
+
+ static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
+ {
+- virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
+- napi_disable(&vi->rq[qp_index].napi);
++ virtnet_napi_tx_disable(&vi->sq[qp_index]);
++ virtnet_napi_disable(&vi->rq[qp_index]);
+ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
+ }
+
+@@ -3314,7 +3323,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
+ bool running = netif_running(vi->dev);
+
+ if (running) {
+- napi_disable(&rq->napi);
++ virtnet_napi_disable(rq);
+ virtnet_cancel_dim(vi, &rq->dim);
+ }
+ }
+@@ -3356,7 +3365,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
+ qindex = sq - vi->sq;
+
+ if (running)
+- virtnet_napi_tx_disable(&sq->napi);
++ virtnet_napi_tx_disable(sq);
+
+ txq = netdev_get_tx_queue(vi->dev, qindex);
+
+@@ -5933,8 +5942,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ /* Make sure NAPI is not using any XDP TX queues for RX. */
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+- napi_disable(&vi->rq[i].napi);
+- virtnet_napi_tx_disable(&vi->sq[i].napi);
++ virtnet_napi_disable(&vi->rq[i]);
++ virtnet_napi_tx_disable(&vi->sq[i]);
+ }
+ }
+
+--
+2.39.5
+
--- /dev/null
+From c7119abb9eb0ede2efe8a6bb34e0774b82e63bf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Mar 2025 01:12:09 +0000
+Subject: virtio-net: Refactor napi_enable paths
+
+From: Joe Damato <jdamato@fastly.com>
+
+[ Upstream commit 2af5adf962d4611a576061501faa8fb39590407e ]
+
+Refactor virtnet_napi_enable and virtnet_napi_tx_enable to take a struct
+receive_queue. Create a helper, virtnet_napi_do_enable, which contains
+the logic to enable a NAPI.
+
+Signed-off-by: Joe Damato <jdamato@fastly.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Tested-by: Lei Yang <leiyang@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://patch.msgid.link/20250307011215.266806-2-jdamato@fastly.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 37 +++++++++++++++++++++----------------
+ 1 file changed, 21 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index d1ed544ba03ac..d25f68004f97e 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -2789,7 +2789,8 @@ static void skb_recv_done(struct virtqueue *rvq)
+ virtqueue_napi_schedule(&rq->napi, rvq);
+ }
+
+-static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
++static void virtnet_napi_do_enable(struct virtqueue *vq,
++ struct napi_struct *napi)
+ {
+ napi_enable(napi);
+
+@@ -2802,10 +2803,16 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
+ local_bh_enable();
+ }
+
+-static void virtnet_napi_tx_enable(struct virtnet_info *vi,
+- struct virtqueue *vq,
+- struct napi_struct *napi)
++static void virtnet_napi_enable(struct receive_queue *rq)
+ {
++ virtnet_napi_do_enable(rq->vq, &rq->napi);
++}
++
++static void virtnet_napi_tx_enable(struct send_queue *sq)
++{
++ struct virtnet_info *vi = sq->vq->vdev->priv;
++ struct napi_struct *napi = &sq->napi;
++
+ if (!napi->weight)
+ return;
+
+@@ -2817,7 +2824,7 @@ static void virtnet_napi_tx_enable(struct virtnet_info *vi,
+ return;
+ }
+
+- return virtnet_napi_enable(vq, napi);
++ virtnet_napi_do_enable(sq->vq, napi);
+ }
+
+ static void virtnet_napi_tx_disable(struct napi_struct *napi)
+@@ -2838,7 +2845,7 @@ static void refill_work(struct work_struct *work)
+
+ napi_disable(&rq->napi);
+ still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
+- virtnet_napi_enable(rq->vq, &rq->napi);
++ virtnet_napi_enable(rq);
+
+ /* In theory, this can happen: if we don't get any buffers in
+ * we will *never* try to fill again.
+@@ -3055,8 +3062,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
+ if (err < 0)
+ goto err_xdp_reg_mem_model;
+
+- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
+- virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
++ virtnet_napi_enable(&vi->rq[qp_index]);
++ virtnet_napi_tx_enable(&vi->sq[qp_index]);
+
+ return 0;
+
+@@ -3320,7 +3327,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
+ schedule_delayed_work(&vi->refill, 0);
+
+ if (running)
+- virtnet_napi_enable(rq->vq, &rq->napi);
++ virtnet_napi_enable(rq);
+ }
+
+ static int virtnet_rx_resize(struct virtnet_info *vi,
+@@ -3383,7 +3390,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
+ __netif_tx_unlock_bh(txq);
+
+ if (running)
+- virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
++ virtnet_napi_tx_enable(sq);
+ }
+
+ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
+@@ -5964,9 +5971,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ if (netif_running(dev)) {
+- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+- &vi->sq[i].napi);
++ virtnet_napi_enable(&vi->rq[i]);
++ virtnet_napi_tx_enable(&vi->sq[i]);
+ }
+ }
+
+@@ -5981,9 +5987,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+
+ if (netif_running(dev)) {
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
+- &vi->sq[i].napi);
++ virtnet_napi_enable(&vi->rq[i]);
++ virtnet_napi_tx_enable(&vi->sq[i]);
+ }
+ }
+ if (prog)
+--
+2.39.5
+
--- /dev/null
+From 405ef5e1d5701da64acfa6682a6d1a8ccb6dbbbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Mar 2025 10:14:42 -0600
+Subject: virtio_pci: Use self group type for cap commands
+
+From: Daniel Jurgens <danielj@nvidia.com>
+
+[ Upstream commit 16c22c56d4282584742022a37d4f79a46ca6094a ]
+
+Section 2.12.1.2 of v1.4 of the VirtIO spec states:
+
+The device and driver capabilities commands are currently defined for
+self group type.
+1. VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY
+2. VIRTIO_ADMIN_CMD_DEVICE_CAP_GET
+3. VIRTIO_ADMIN_CMD_DRIVER_CAP_SET
+
+Fixes: bfcad518605d ("virtio: Manage device and driver capabilities via the admin commands")
+Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
+Reviewed-by: Parav Pandit <parav@nvidia.com>
+Message-Id: <20250304161442.90700-1-danielj@nvidia.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/virtio/virtio_pci_modern.c | 4 ++--
+ include/uapi/linux/virtio_pci.h | 1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
+index 5eaade7578606..d50fe030d8253 100644
+--- a/drivers/virtio/virtio_pci_modern.c
++++ b/drivers/virtio/virtio_pci_modern.c
+@@ -247,7 +247,7 @@ virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev)
+ sg_init_one(&data_sg, get_data, sizeof(*get_data));
+ sg_init_one(&result_sg, result, sizeof(*result));
+ cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET);
+- cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV);
++ cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF);
+ cmd.data_sg = &data_sg;
+ cmd.result_sg = &result_sg;
+ ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd);
+@@ -305,7 +305,7 @@ static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev)
+
+ sg_init_one(&result_sg, data, sizeof(*data));
+ cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY);
+- cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV);
++ cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF);
+ cmd.result_sg = &result_sg;
+
+ ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd);
+diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
+index 8549d45712571..c691ac210ce2e 100644
+--- a/include/uapi/linux/virtio_pci.h
++++ b/include/uapi/linux/virtio_pci.h
+@@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap {
+ #define VIRTIO_ADMIN_CMD_LIST_USE 0x1
+
+ /* Admin command group type. */
++#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0
+ #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1
+
+ /* Transitional device admin command. */
+--
+2.39.5
+