From: Sasha Levin Date: Sun, 27 Apr 2025 23:06:48 +0000 (-0400) Subject: Fixes for 6.14 X-Git-Tag: v5.4.293~80 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=65f401cb2d2fbf9a6914b5184fb2198bda173908;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.14 Signed-off-by: Sasha Levin --- diff --git a/queue-6.14/bdev-use-bdev_io_min-for-statx-block-size.patch b/queue-6.14/bdev-use-bdev_io_min-for-statx-block-size.patch new file mode 100644 index 0000000000..2faa42409d --- /dev/null +++ b/queue-6.14/bdev-use-bdev_io_min-for-statx-block-size.patch @@ -0,0 +1,120 @@ +From c87d8260c78fdc9dc4c5738e5f65ebf76cfb689f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 21 Feb 2025 14:38:23 -0800 +Subject: bdev: use bdev_io_min() for statx block size + +From: Luis Chamberlain + +[ Upstream commit 425fbcd62d2e1330e64d8d3bf89e554830ba997f ] + +You can use lsblk to query for a block device block device block size: + +lsblk -o MIN-IO /dev/nvme0n1 +MIN-IO + 4096 + +The min-io is the minimum IO the block device prefers for optimal +performance. In turn we map this to the block device block size. +The current block size exposed even for block devices with an +LBA format of 16k is 4k. Likewise devices which support 4k LBA format +but have a larger Indirection Unit of 16k have an exposed block size +of 4k. + +This incurs read-modify-writes on direct IO against devices with a +min-io larger than the page size. To fix this, use the block device +min io, which is the minimal optimal IO the device prefers. + +With this we now get: + +lsblk -o MIN-IO /dev/nvme0n1 +MIN-IO + 16384 + +And so userspace gets the appropriate information it needs for optimal +performance. This is verified with blkalgn against mkfs against a +device with LBA format of 4k but an NPWG of 16k (min io size) + +mkfs.xfs -f -b size=16k /dev/nvme3n1 +blkalgn -d nvme3n1 --ops Write + + Block size : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 0 | | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 0 | | + 16384 -> 32767 : 66 |****************************************| + 32768 -> 65535 : 0 | | + 65536 -> 131071 : 0 | | + 131072 -> 262143 : 2 |* | +Block size: 14 - 66 +Block size: 17 - 2 + + Algn size : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 0 | | + 4096 -> 8191 : 0 | | + 8192 -> 16383 : 0 | | + 16384 -> 32767 : 66 |****************************************| + 32768 -> 65535 : 0 | | + 65536 -> 131071 : 0 | | + 131072 -> 262143 : 2 |* | +Algn size: 14 - 66 +Algn size: 17 - 2 + +Reviewed-by: Hannes Reinecke +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20250221223823.1680616-9-mcgrof@kernel.org +Reviewed-by: John Garry +Signed-off-by: Christian Brauner +Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat") +Signed-off-by: Sasha Levin +--- + block/bdev.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/block/bdev.c b/block/bdev.c +index 9d73a8fbf7f99..8453f6a795d9a 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -1274,9 +1274,6 @@ void bdev_statx(struct path *path, struct kstat *stat, + struct inode *backing_inode; + struct block_device *bdev; + +- if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC))) +- return; +- + backing_inode = d_backing_inode(path->dentry); + + /* +@@ -1303,6 +1300,8 @@ void bdev_statx(struct path *path, struct kstat *stat, + queue_atomic_write_unit_max_bytes(bd_queue)); + } + ++ stat->blksize = bdev_io_min(bdev); ++ + blkdev_put_no_open(bdev); + } + +-- +2.39.5 + diff --git a/queue-6.14/block-don-t-autoload-drivers-on-stat.patch b/queue-6.14/block-don-t-autoload-drivers-on-stat.patch new file mode 100644 index 0000000000..a59117db7c --- /dev/null +++ b/queue-6.14/block-don-t-autoload-drivers-on-stat.patch @@ -0,0 +1,107 @@ +From b8ccd8a745fdcb09743f94aef1eba2baa7fdb49e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 07:37:41 +0200 +Subject: block: don't autoload drivers on stat + +From: Christoph Hellwig + +[ Upstream commit 5f33b5226c9d92359e58e91ad0bf0c1791da36a1 ] + +blkdev_get_no_open can trigger the legacy autoload of block drivers. A +simple stat of a block device has not historically done that, so disable +this behavior again. + +Fixes: 9abcfbd235f5 ("block: Add atomic write support for statx") +Signed-off-by: Christoph Hellwig +Reviewed-by: Christian Brauner +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20250423053810.1683309-4-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/bdev.c | 8 ++++---- + block/blk-cgroup.c | 2 +- + block/blk.h | 2 +- + block/fops.c | 2 +- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/block/bdev.c b/block/bdev.c +index 89235796e51a5..5aebcf437f17c 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -773,13 +773,13 @@ static void blkdev_put_part(struct block_device *part) + blkdev_put_whole(whole); + } + +-struct block_device *blkdev_get_no_open(dev_t dev) ++struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) + { + struct block_device *bdev; + struct inode *inode; + + inode = ilookup(blockdev_superblock, dev); +- if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { ++ if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { + blk_request_module(dev); + inode = ilookup(blockdev_superblock, dev); + if (inode) +@@ -1001,7 +1001,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + if (ret) + return ERR_PTR(ret); + +- bdev = blkdev_get_no_open(dev); ++ bdev = blkdev_get_no_open(dev, true); + if (!bdev) + return ERR_PTR(-ENXIO); + +@@ -1279,7 +1279,7 @@ void bdev_statx(struct path *path, struct kstat *stat, + * use I_BDEV() here; the block device has to be looked up by i_rdev + * instead. + */ +- bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev); ++ bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); + if (!bdev) + return; + +diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c +index 9ed93d91d754a..c94efae5bcfaf 100644 +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -796,7 +796,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) + return -EINVAL; + input = skip_spaces(input); + +- bdev = blkdev_get_no_open(MKDEV(major, minor)); ++ bdev = blkdev_get_no_open(MKDEV(major, minor), true); + if (!bdev) + return -ENODEV; + if (bdev_is_partition(bdev)) { +diff --git a/block/blk.h b/block/blk.h +index c0120a3d9dc57..9dcc92c7f2b50 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -94,7 +94,7 @@ static inline void blk_wait_io(struct completion *done) + wait_for_completion_io(done); + } + +-struct block_device *blkdev_get_no_open(dev_t dev); ++struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); + void blkdev_put_no_open(struct block_device *bdev); + + #define BIO_INLINE_VECS 4 +diff --git a/block/fops.c b/block/fops.c +index be9f1dbea9ce0..d23ddb2dc1138 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) + if (ret) + return ret; + +- bdev = blkdev_get_no_open(inode->i_rdev); ++ bdev = blkdev_get_no_open(inode->i_rdev, true); + if (!bdev) + return -ENXIO; + +-- +2.39.5 + diff --git a/queue-6.14/block-move-blkdev_-get-put-_no_open-prototypes-out-o.patch b/queue-6.14/block-move-blkdev_-get-put-_no_open-prototypes-out-o.patch new file mode 100644 index 0000000000..93877156b2 --- /dev/null +++ b/queue-6.14/block-move-blkdev_-get-put-_no_open-prototypes-out-o.patch @@ -0,0 +1,56 @@ +From f0bac2898219c7d5d4e8a1b60675d720aacec8cf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 07:37:39 +0200 +Subject: block: move blkdev_{get,put} _no_open prototypes out of blkdev.h + +From: Christoph Hellwig + +[ Upstream commit c63202140d4b411d27380805c4d68eb11407b7f2 ] + +These are only to be used by block internal code. Remove the comment +as we grew more users due to reworking block device node opening. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Christian Brauner +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20250423053810.1683309-2-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat") +Signed-off-by: Sasha Levin +--- + block/blk.h | 3 +++ + include/linux/blkdev.h | 4 ---- + 2 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/block/blk.h b/block/blk.h +index 9cf9a0099416d..c0120a3d9dc57 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done) + wait_for_completion_io(done); + } + ++struct block_device *blkdev_get_no_open(dev_t dev); ++void blkdev_put_no_open(struct block_device *bdev); ++ + #define BIO_INLINE_VECS 4 + struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, + gfp_t gfp_mask); +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index d37751789bf58..6aa67e9b2ec08 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1649,10 +1649,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, + const struct blk_holder_ops *hops); + void bd_abort_claiming(struct block_device *bdev, void *holder); + +-/* just for blk-cgroup, don't use elsewhere */ +-struct block_device *blkdev_get_no_open(dev_t dev); +-void blkdev_put_no_open(struct block_device *bdev); +- + struct block_device *I_BDEV(struct inode *inode); + struct block_device *file_bdev(struct file *bdev_file); + bool disk_live(struct gendisk *disk); +-- +2.39.5 + diff --git a/queue-6.14/block-never-reduce-ra_pages-in-blk_apply_bdi_limits.patch b/queue-6.14/block-never-reduce-ra_pages-in-blk_apply_bdi_limits.patch new file mode 100644 index 0000000000..6dd06b3a71 --- /dev/null +++ b/queue-6.14/block-never-reduce-ra_pages-in-blk_apply_bdi_limits.patch @@ -0,0 +1,62 @@ +From d5096485ae2f486eabca609e893c7b6971d39cc3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 10:25:21 +0200 +Subject: block: never reduce ra_pages in blk_apply_bdi_limits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christoph Hellwig + +[ Upstream commit 7b720c720253e2070459420b2628a7b9ee6733b3 ] + +When the user increased the read-ahead size through sysfs this value +currently get lost if the device is reprobe, including on a resume +from suspend. + +As there is no hardware limitation for the read-ahead size there is +no real need to reset it or track a separate hardware limitation +like for max_sectors. + +This restores the pre-atomic queue limit behavior in the sd driver as +sd did not use blk_queue_io_opt and thus never updated the read ahead +size to the value based of the optimal I/O, but changes behavior for +all other drivers. As the new behavior seems useful and sd is the +driver for which the readahead size tweaks are most useful that seems +like a worthwhile trade off. + +Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API") +Reported-by: Holger Hoffstätte +Signed-off-by: Christoph Hellwig +Tested-by: Holger Hoffstätte +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20250424082521.1967286-1-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-settings.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/block/blk-settings.c b/block/blk-settings.c +index 66721afeea546..67b119ffa1689 100644 +--- a/block/blk-settings.c ++++ b/block/blk-settings.c +@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, + /* + * For read-ahead of large files to be effective, we need to read ahead + * at least twice the optimal I/O size. ++ * ++ * There is no hardware limitation for the read-ahead size and the user ++ * might have increased the read-ahead size through sysfs, so don't ever ++ * decrease it. + */ +- bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); ++ bdi->ra_pages = max3(bdi->ra_pages, ++ lim->io_opt * 2 / PAGE_SIZE, ++ VM_READAHEAD_PAGES); + bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; + } + +-- +2.39.5 + diff --git a/queue-6.14/block-remove-the-backing_inode-variable-in-bdev_stat.patch b/queue-6.14/block-remove-the-backing_inode-variable-in-bdev_stat.patch new file mode 100644 index 0000000000..e2581a3970 --- /dev/null +++ b/queue-6.14/block-remove-the-backing_inode-variable-in-bdev_stat.patch @@ -0,0 +1,53 @@ +From 9a7eeadb5a98c3a6edc41b61cc5e2122d326b89c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 07:37:40 +0200 +Subject: block: remove the backing_inode variable in bdev_statx + +From: Christoph Hellwig + +[ Upstream commit d13b7090b2510abaa83a25717466decca23e8226 ] + +backing_inode is only used once, so remove it and update the comment +describing the bdev lookup to be a bit more clear. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Christian Brauner +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20250423053810.1683309-3-hch@lst.de +Signed-off-by: Jens Axboe +Stable-dep-of: 5f33b5226c9d ("block: don't autoload drivers on stat") +Signed-off-by: Sasha Levin +--- + block/bdev.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +diff --git a/block/bdev.c b/block/bdev.c +index 8453f6a795d9a..89235796e51a5 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -1271,18 +1271,15 @@ void sync_bdevs(bool wait) + void bdev_statx(struct path *path, struct kstat *stat, + u32 request_mask) + { +- struct inode *backing_inode; + struct block_device *bdev; + +- backing_inode = d_backing_inode(path->dentry); +- + /* +- * Note that backing_inode is the inode of a block device node file, +- * not the block device's internal inode. Therefore it is *not* valid +- * to use I_BDEV() here; the block device has to be looked up by i_rdev ++ * Note that d_backing_inode() returns the block device node inode, not ++ * the block device's internal inode. Therefore it is *not* valid to ++ * use I_BDEV() here; the block device has to be looked up by i_rdev + * instead. + */ +- bdev = blkdev_get_no_open(backing_inode->i_rdev); ++ bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev); + if (!bdev) + return; + +-- +2.39.5 + diff --git a/queue-6.14/bpf-add-namespace-to-bpf-internal-symbols.patch b/queue-6.14/bpf-add-namespace-to-bpf-internal-symbols.patch new file mode 100644 index 0000000000..68bd0de059 --- /dev/null +++ b/queue-6.14/bpf-add-namespace-to-bpf-internal-symbols.patch @@ -0,0 +1,87 @@ +From d1e43c16f00ab56df38b82b77443ff24572d06c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 18:45:42 -0700 +Subject: bpf: Add namespace to BPF internal symbols + +From: Alexei Starovoitov + +[ Upstream commit f88886de0927a2adf4c1b4c5c1f1d31d2023ef74 ] + +Add namespace to BPF internal symbols used by light skeleton +to prevent abuse and document with the code their allowed usage. + +Fixes: b1d18a7574d0 ("bpf: Extend sys_bpf commands for bpf_syscall programs.") +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/bpf/20250425014542.62385-1-alexei.starovoitov@gmail.com +Signed-off-by: Sasha Levin +--- + Documentation/bpf/bpf_devel_QA.rst | 8 ++++++++ + kernel/bpf/preload/bpf_preload_kern.c | 1 + + kernel/bpf/syscall.c | 6 +++--- + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst +index de27e1620821c..0acb4c9b8d90f 100644 +--- a/Documentation/bpf/bpf_devel_QA.rst ++++ b/Documentation/bpf/bpf_devel_QA.rst +@@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted + into the Linux kernel, please implement support into LLVM's BPF back + end. See LLVM_ section below for further information. + ++Q: What "BPF_INTERNAL" symbol namespace is for? ++----------------------------------------------- ++A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure ++like preload kernel modules with light skeleton. Most symbols outside ++of BPF_INTERNAL are not expected to be used by code outside of BPF either. ++Symbols may lack the designation because they predate the namespaces, ++or due to an oversight. ++ + Stable submission + ================= + +diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c +index 0c63bc2cd895a..56a81df7a9d7c 100644 +--- a/kernel/bpf/preload/bpf_preload_kern.c ++++ b/kernel/bpf/preload/bpf_preload_kern.c +@@ -89,4 +89,5 @@ static void __exit fini(void) + } + late_initcall(load); + module_exit(fini); ++MODULE_IMPORT_NS("BPF_INTERNAL"); + MODULE_LICENSE("GPL"); +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index e1e42e918ba7f..1c2caae0d8946 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -1562,7 +1562,7 @@ struct bpf_map *bpf_map_get(u32 ufd) + + return map; + } +-EXPORT_SYMBOL(bpf_map_get); ++EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); + + struct bpf_map *bpf_map_get_with_uref(u32 ufd) + { +@@ -3345,7 +3345,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) + bpf_link_inc(link); + return link; + } +-EXPORT_SYMBOL(bpf_link_get_from_fd); ++EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); + + static void bpf_tracing_link_release(struct bpf_link *link) + { +@@ -5981,7 +5981,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) + return ____bpf_sys_bpf(cmd, attr, size); + } + } +-EXPORT_SYMBOL(kern_sys_bpf); ++EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); + + static const struct bpf_func_proto bpf_sys_bpf_proto = { + .func = bpf_sys_bpf, +-- +2.39.5 + diff --git a/queue-6.14/btrfs-avoid-page_lockend-underflow-in-btrfs_punch_ho.patch b/queue-6.14/btrfs-avoid-page_lockend-underflow-in-btrfs_punch_ho.patch new file mode 100644 index 0000000000..1aeee77f5f --- /dev/null +++ b/queue-6.14/btrfs-avoid-page_lockend-underflow-in-btrfs_punch_ho.patch @@ -0,0 +1,95 @@ +From 3f9ddc4906b449a18458d225e01e88b6b949a8a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Mar 2025 17:46:35 +1030 +Subject: btrfs: avoid page_lockend underflow in btrfs_punch_hole_lock_range() + +From: Qu Wenruo + +[ Upstream commit bc2dbc4983afedd198490cca043798f57c93e9bf ] + +[BUG] +When running btrfs/004 with 4K fs block size and 64K page size, +sometimes fsstress workload can take 100% CPU for a while, but not long +enough to trigger a 120s hang warning. + +[CAUSE] +When such 100% CPU usage happens, btrfs_punch_hole_lock_range() is +always in the call trace. + +One example when this problem happens, the function +btrfs_punch_hole_lock_range() got the following parameters: + + lock_start = 4096, lockend = 20469 + +Then we calculate @page_lockstart by rounding up lock_start to page +boundary, which is 64K (page size is 64K). + +For @page_lockend, we round down the value towards page boundary, which +result 0. Then since we need to pass an inclusive end to +filemap_range_has_page(), we subtract 1 from the rounded down value, +resulting in (u64)-1. + +In the above case, the range is inside the same page, and we do not even +need to call filemap_range_has_page(), not to mention to call it with +(u64)-1 at the end. + +This behavior will cause btrfs_punch_hole_lock_range() to busy loop +waiting for irrelevant range to have its pages dropped. + +[FIX] +Calculate @page_lockend by just rounding down @lockend, without +decreasing the value by one. So @page_lockend will no longer overflow. + +Then exit early if @page_lockend is no larger than @page_lockstart. +As it means either the range is inside the same page, or the two pages +are adjacent already. + +Finally only decrease @page_lockend when calling filemap_range_has_page(). + +Fixes: 0528476b6ac7 ("btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range()") +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/file.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c +index 0b568c8d24cbc..a92997a583bd2 100644 +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, + * will always return true. + * So here we need to do extra page alignment for + * filemap_range_has_page(). ++ * ++ * And do not decrease page_lockend right now, as it can be 0. + */ + const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); +- const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; ++ const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE); + + while (1) { + truncate_pagecache_range(inode, lockstart, lockend); + + lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, + cached_state); ++ /* The same page or adjacent pages. */ ++ if (page_lockend <= page_lockstart) ++ break; + /* + * We can't have ordered extents in the range, nor dirty/writeback + * pages, because we have locked the inode's VFS lock in exclusive +@@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, + * we do, unlock the range and retry. + */ + if (!filemap_range_has_page(inode->i_mapping, page_lockstart, +- page_lockend)) ++ page_lockend - 1)) + break; + + unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, +-- +2.39.5 + diff --git a/queue-6.14/btrfs-zoned-return-eio-on-raid1-block-group-write-po.patch b/queue-6.14/btrfs-zoned-return-eio-on-raid1-block-group-write-po.patch new file mode 100644 index 0000000000..b28cb17a58 --- /dev/null +++ b/queue-6.14/btrfs-zoned-return-eio-on-raid1-block-group-write-po.patch @@ -0,0 +1,149 @@ +From 1f2722c0acfbf983fb376a2199f8c40d32c86f13 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Mar 2025 16:04:01 +0100 +Subject: btrfs: zoned: return EIO on RAID1 block group write pointer mismatch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johannes Thumshirn + +[ Upstream commit b0c26f47992672661340dd6ea931240213016609 ] + +There was a bug report about a NULL pointer dereference in +__btrfs_add_free_space_zoned() that ultimately happens because a +conversion from the default metadata profile DUP to a RAID1 profile on two +disks. + +The stack trace has the following signature: + + BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile + BUG: kernel NULL pointer dereference, address: 0000000000000058 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI + RIP: 0010:__btrfs_add_free_space_zoned.isra.0+0x61/0x1a0 + RSP: 0018:ffffa236b6f3f6d0 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: ffff96c8132f3400 RCX: 0000000000000001 + RDX: 0000000010000000 RSI: 0000000000000000 RDI: ffff96c8132f3410 + RBP: 0000000010000000 R08: 0000000000000003 R09: 0000000000000000 + R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000 + R13: ffff96c758f65a40 R14: 0000000000000001 R15: 000011aac0000000 + FS: 00007fdab1cb2900(0000) GS:ffff96e60ca00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000058 CR3: 00000001a05ae000 CR4: 0000000000350ef0 + Call Trace: + + ? __die_body.cold+0x19/0x27 + ? page_fault_oops+0x15c/0x2f0 + ? exc_page_fault+0x7e/0x180 + ? asm_exc_page_fault+0x26/0x30 + ? __btrfs_add_free_space_zoned.isra.0+0x61/0x1a0 + btrfs_add_free_space_async_trimmed+0x34/0x40 + btrfs_add_new_free_space+0x107/0x120 + btrfs_make_block_group+0x104/0x2b0 + btrfs_create_chunk+0x977/0xf20 + btrfs_chunk_alloc+0x174/0x510 + ? srso_return_thunk+0x5/0x5f + btrfs_inc_block_group_ro+0x1b1/0x230 + btrfs_relocate_block_group+0x9e/0x410 + btrfs_relocate_chunk+0x3f/0x130 + btrfs_balance+0x8ac/0x12b0 + ? srso_return_thunk+0x5/0x5f + ? srso_return_thunk+0x5/0x5f + ? __kmalloc_cache_noprof+0x14c/0x3e0 + btrfs_ioctl+0x2686/0x2a80 + ? srso_return_thunk+0x5/0x5f + ? ioctl_has_perm.constprop.0.isra.0+0xd2/0x120 + __x64_sys_ioctl+0x97/0xc0 + do_syscall_64+0x82/0x160 + ? srso_return_thunk+0x5/0x5f + ? __memcg_slab_free_hook+0x11a/0x170 + ? srso_return_thunk+0x5/0x5f + ? kmem_cache_free+0x3f0/0x450 + ? srso_return_thunk+0x5/0x5f + ? srso_return_thunk+0x5/0x5f + ? syscall_exit_to_user_mode+0x10/0x210 + ? srso_return_thunk+0x5/0x5f + ? do_syscall_64+0x8e/0x160 + ? sysfs_emit+0xaf/0xc0 + ? srso_return_thunk+0x5/0x5f + ? srso_return_thunk+0x5/0x5f + ? seq_read_iter+0x207/0x460 + ? srso_return_thunk+0x5/0x5f + ? vfs_read+0x29c/0x370 + ? srso_return_thunk+0x5/0x5f + ? srso_return_thunk+0x5/0x5f + ? syscall_exit_to_user_mode+0x10/0x210 + ? srso_return_thunk+0x5/0x5f + ? do_syscall_64+0x8e/0x160 + ? srso_return_thunk+0x5/0x5f + ? exc_page_fault+0x7e/0x180 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + RIP: 0033:0x7fdab1e0ca6d + RSP: 002b:00007ffeb2b60c80 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdab1e0ca6d + RDX: 00007ffeb2b60d80 RSI: 00000000c4009420 RDI: 0000000000000003 + RBP: 00007ffeb2b60cd0 R08: 0000000000000000 R09: 0000000000000013 + R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + R13: 00007ffeb2b6343b R14: 00007ffeb2b60d80 R15: 0000000000000001 + + CR2: 0000000000000058 + ---[ end trace 0000000000000000 ]--- + +The 1st line is the most interesting here: + + BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile + +When a RAID1 block-group is created and a write pointer mismatch between +the disks in the RAID set is detected, btrfs sets the alloc_offset to the +length of the block group marking it as full. Afterwards the code expects +that a balance operation will evacuate the data in this block-group and +repair the problems. + +But before this is possible, the new space of this block-group will be +accounted in the free space cache. But in __btrfs_add_free_space_zoned() +it is being checked if it is a initial creation of a block group and if +not a reclaim decision will be made. But the decision if a block-group's +free space accounting is done for an initial creation depends on if the +size of the added free space is the whole length of the block-group and +the allocation offset is 0. + +But as btrfs_load_block_group_zone_info() sets the allocation offset to +the zone capacity (i.e. marking the block-group as full) this initial +decision is not met, and the space_info pointer in the 'struct +btrfs_block_group' has not yet been assigned. + +Fail creation of the block group and rely on manual user intervention to +re-balance the filesystem. + +Afterwards the filesystem can be unmounted, mounted in degraded mode and +the missing device can be removed after a full balance of the filesystem. + +Reported-by: 西木野羰基 +Link: https://lore.kernel.org/linux-btrfs/CAB_b4sBhDe3tscz=duVyhc9hNE+gu=B8CrgLO152uMyanR8BEA@mail.gmail.com/ +Fixes: b1934cd60695 ("btrfs: zoned: handle broken write pointer on zones") +Reviewed-by: Anand Jain +Signed-off-by: Johannes Thumshirn +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/zoned.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c +index aaf925897fdda..978a57da8b4f5 100644 +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1659,7 +1659,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) + * stripe. + */ + cache->alloc_offset = cache->zone_capacity; +- ret = 0; + } + + out: +-- +2.39.5 + diff --git a/queue-6.14/ceph-fix-incorrect-flush-end-position-calculation.patch b/queue-6.14/ceph-fix-incorrect-flush-end-position-calculation.patch new file mode 100644 index 0000000000..ec1bd78ee5 --- /dev/null +++ b/queue-6.14/ceph-fix-incorrect-flush-end-position-calculation.patch @@ -0,0 +1,43 @@ +From aa71665db9dfcfcd8024c828fccc980f056548cd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Mar 2025 10:47:11 +0000 +Subject: ceph: Fix incorrect flush end position calculation + +From: David Howells + +[ Upstream commit f452a2204614fc10e2c3b85904c4bd300c2789dc ] + +In ceph, in fill_fscrypt_truncate(), the end flush position is calculated +by: + + loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; + +but that's using the block shift not the block size. + +Fix this to use the block size instead. + +Fixes: 5c64737d2536 ("ceph: add truncate size handling support for fscrypt") +Signed-off-by: David Howells +Reviewed-by: Viacheslav Dubeyko +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c +index 7dd6c2275085b..e3ab07797c850 100644 +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -2362,7 +2362,7 @@ static int fill_fscrypt_truncate(struct inode *inode, + + /* Try to writeback the dirty pagecaches */ + if (issued & (CEPH_CAP_FILE_BUFFER)) { +- loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; ++ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1; + + ret = filemap_write_and_wait_range(inode->i_mapping, + orig_pos, lend); +-- +2.39.5 + diff --git a/queue-6.14/cgroup-cpuset-v1-add-missing-support-for-cpuset_v2_m.patch b/queue-6.14/cgroup-cpuset-v1-add-missing-support-for-cpuset_v2_m.patch new file mode 100644 index 0000000000..0454f3185e --- /dev/null +++ b/queue-6.14/cgroup-cpuset-v1-add-missing-support-for-cpuset_v2_m.patch @@ -0,0 +1,105 @@ +From 24e6f3337536d9bdf593a691bd534dc1613d5f91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Apr 2025 21:17:51 +0000 +Subject: cgroup/cpuset-v1: Add missing support for cpuset_v2_mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: T.J. Mercier + +[ Upstream commit 1bf67c8fdbda21fadd564a12dbe2b13c1ea5eda7 ] + +Android has mounted the v1 cpuset controller using filesystem type +"cpuset" (not "cgroup") since 2015 [1], and depends on the resulting +behavior where the controller name is not added as a prefix for cgroupfs +files. [2] + +Later, a problem was discovered where cpu hotplug onlining did not +affect the cpuset/cpus files, which Android carried an out-of-tree patch +to address for a while. An attempt was made to upstream this patch, but +the recommendation was to use the "cpuset_v2_mode" mount option +instead. [3] + +An effort was made to do so, but this fails with "cgroup: Unknown +parameter 'cpuset_v2_mode'" because commit e1cba4b85daa ("cgroup: Add +mount flag to enable cpuset to use v2 behavior in v1 cgroup") did not +update the special cased cpuset_mount(), and only the cgroup (v1) +filesystem type was updated. + +Add parameter parsing to the cpuset filesystem type so that +cpuset_v2_mode works like the cgroup filesystem type: + +$ mkdir /dev/cpuset +$ mount -t cpuset -ocpuset_v2_mode none /dev/cpuset +$ mount|grep cpuset +none on /dev/cpuset type cgroup (rw,relatime,cpuset,noprefix,cpuset_v2_mode,release_agent=/sbin/cpuset_release_agent) + +[1] https://cs.android.com/android/_/android/platform/system/core/+/b769c8d24fd7be96f8968aa4c80b669525b930d3 +[2] https://cs.android.com/android/platform/superproject/main/+/main:system/core/libprocessgroup/setup/cgroup_map_write.cpp;drc=2dac5d89a0f024a2d0cc46a80ba4ee13472f1681;l=192 +[3] https://lore.kernel.org/lkml/f795f8be-a184-408a-0b5a-553d26061385@redhat.com/T/ + +Fixes: e1cba4b85daa ("cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup") +Signed-off-by: T.J. Mercier +Acked-by: Waiman Long +Reviewed-by: Kamalesh Babulal +Acked-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cgroup.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 81f078c059e86..68d58753c75c3 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -2339,9 +2339,37 @@ static struct file_system_type cgroup2_fs_type = { + }; + + #ifdef CONFIG_CPUSETS_V1 ++enum cpuset_param { ++ Opt_cpuset_v2_mode, ++}; ++ ++static const struct fs_parameter_spec cpuset_fs_parameters[] = { ++ fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), ++ {} ++}; ++ ++static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) ++{ ++ struct cgroup_fs_context *ctx = cgroup_fc2context(fc); ++ struct fs_parse_result result; ++ int opt; ++ ++ opt = fs_parse(fc, cpuset_fs_parameters, param, &result); ++ if (opt < 0) ++ return opt; ++ ++ switch (opt) { ++ case Opt_cpuset_v2_mode: ++ ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; ++ return 0; ++ } ++ return -EINVAL; ++} ++ + static const struct fs_context_operations cpuset_fs_context_ops = { + .get_tree = cgroup1_get_tree, + .free = cgroup_fs_context_free, ++ .parse_param = cpuset_parse_param, + }; + + /* +@@ -2378,6 +2406,7 @@ static int cpuset_init_fs_context(struct fs_context *fc) + static struct file_system_type cpuset_fs_type = { + .name = "cpuset", + .init_fs_context = cpuset_init_fs_context, ++ .parameters = cpuset_fs_parameters, + .fs_flags = FS_USERNS_MOUNT, + }; + #endif +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-apple-soc-fix-null-ptr-deref-in-apple_soc_cp.patch b/queue-6.14/cpufreq-apple-soc-fix-null-ptr-deref-in-apple_soc_cp.patch new file mode 100644 index 0000000000..da31a5889d --- /dev/null +++ b/queue-6.14/cpufreq-apple-soc-fix-null-ptr-deref-in-apple_soc_cp.patch @@ -0,0 +1,49 @@ +From c9563ac07a33660df349cad5b1af3bfe43311b03 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Apr 2025 20:48:13 +0800 +Subject: cpufreq: apple-soc: Fix null-ptr-deref in + apple_soc_cpufreq_get_rate() + +From: Henry Martin + +[ Upstream commit 9992649f6786921873a9b89dafa5e04d8c5fef2b ] + +cpufreq_cpu_get_raw() can return NULL when the target CPU is not present +in the policy->cpus mask. apple_soc_cpufreq_get_rate() does not check +for this case, which results in a NULL pointer dereference. + +Fixes: 6286bbb40576 ("cpufreq: apple-soc: Add new driver to control Apple SoC CPU P-states") +Signed-off-by: Henry Martin +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/apple-soc-cpufreq.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c +index 269b18c62d040..82007f6a24d2a 100644 +--- a/drivers/cpufreq/apple-soc-cpufreq.c ++++ b/drivers/cpufreq/apple-soc-cpufreq.c +@@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { + + static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) + { +- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); +- struct apple_cpu_priv *priv = policy->driver_data; ++ struct cpufreq_policy *policy; ++ struct apple_cpu_priv *priv; + struct cpufreq_frequency_table *p; + unsigned int pstate; + ++ policy = cpufreq_cpu_get_raw(cpu); ++ if (unlikely(!policy)) ++ return 0; ++ ++ priv = policy->driver_data; ++ + if (priv->info->cur_pstate_mask) { + u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); + +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-cppc-fix-invalid-return-value-in-.get-callba.patch b/queue-6.14/cpufreq-cppc-fix-invalid-return-value-in-.get-callba.patch new file mode 100644 index 0000000000..5ee619baec --- /dev/null +++ b/queue-6.14/cpufreq-cppc-fix-invalid-return-value-in-.get-callba.patch @@ -0,0 +1,40 @@ +From c270997fc0d99ab1fcf8e80c185796e4bbd07695 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 13 Apr 2025 11:11:42 +0100 +Subject: cpufreq: cppc: Fix invalid return value in .get() callback + +From: Marc Zyngier + +[ Upstream commit 2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc ] + +Returning a negative error code in a function with an unsigned +return type is a pretty bad idea. It is probably worse when the +justification for the change is "our static analisys tool found it". + +Fixes: cf7de25878a1 ("cppc_cpufreq: Fix possible null pointer dereference") +Signed-off-by: Marc Zyngier +Cc: "Rafael J. Wysocki" +Cc: Viresh Kumar +Reviewed-by: Lifeng Zheng +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/cppc_cpufreq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c +index 8f512448382f4..ba7c16c0e4756 100644 +--- a/drivers/cpufreq/cppc_cpufreq.c ++++ b/drivers/cpufreq/cppc_cpufreq.c +@@ -749,7 +749,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) + int ret; + + if (!policy) +- return -ENODEV; ++ return 0; + + cpu_data = policy->driver_data; + +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-do-not-enable-by-default-during-compile-test.patch b/queue-6.14/cpufreq-do-not-enable-by-default-during-compile-test.patch new file mode 100644 index 0000000000..2ff08c6321 --- /dev/null +++ b/queue-6.14/cpufreq-do-not-enable-by-default-during-compile-test.patch @@ -0,0 +1,98 @@ +From b47e6c7651654b1b4640eebc24502c38a820566d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Apr 2025 14:40:06 +0200 +Subject: cpufreq: Do not enable by default during compile testing + +From: Krzysztof Kozlowski + +[ Upstream commit d4f610a9bafdec8e3210789aa19335367da696ea ] + +Enabling the compile test should not cause automatic enabling of all +drivers. + +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Viresh Kumar +Stable-dep-of: a374f28700ab ("cpufreq: fix compile-test defaults") +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/Kconfig.arm | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm +index 4f9cb943d945c..d4d625ded285f 100644 +--- a/drivers/cpufreq/Kconfig.arm ++++ b/drivers/cpufreq/Kconfig.arm +@@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ + config ARM_BRCMSTB_AVS_CPUFREQ + tristate "Broadcom STB AVS CPUfreq driver" + depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST +- default y ++ default ARCH_BRCMSTB + help + Some Broadcom STB SoCs use a co-processor running proprietary firmware + ("AVS") to handle voltage and frequency scaling. This driver provides +@@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ + config ARM_S3C64XX_CPUFREQ + bool "Samsung S3C64XX" + depends on CPU_S3C6410 || COMPILE_TEST +- default y ++ default CPU_S3C6410 + help + This adds the CPUFreq driver for Samsung S3C6410 SoC. + +@@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ + config ARM_S5PV210_CPUFREQ + bool "Samsung S5PV210 and S5PC110" + depends on CPU_S5PV210 || COMPILE_TEST +- default y ++ default CPU_S5PV210 + help + This adds the CPUFreq driver for Samsung S5PV210 and + S5PC110 SoCs. +@@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ + config ARM_SPEAR_CPUFREQ + bool "SPEAr CPUFreq support" + depends on PLAT_SPEAR || COMPILE_TEST +- default y ++ default PLAT_SPEAR + help + This adds the CPUFreq driver support for SPEAr SOCs. + +@@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ + tristate "Tegra20/30 CPUFreq support" + depends on ARCH_TEGRA || COMPILE_TEST + depends on CPUFREQ_DT +- default y ++ default ARCH_TEGRA + help + This adds the CPUFreq driver support for Tegra20/30 SOCs. + +@@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ + bool "Tegra124 CPUFreq support" + depends on ARCH_TEGRA || COMPILE_TEST + depends on CPUFREQ_DT +- default y ++ default ARCH_TEGRA + help + This adds the CPUFreq driver support for Tegra124 SOCs. + +@@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ + tristate "Tegra194 CPUFreq support" + depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) + depends on TEGRA_BPMP +- default y ++ default ARCH_TEGRA + help + This adds CPU frequency driver support for Tegra194 SOCs. + + config ARM_TI_CPUFREQ + bool "Texas Instruments CPUFreq support" + depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST +- default y ++ default ARCH_OMAP2PLUS || ARCH_K3 + help + This driver enables valid OPPs on the running platform based on + values contained within the SoC in use. Enable this in order to +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-fix-compile-test-defaults.patch b/queue-6.14/cpufreq-fix-compile-test-defaults.patch new file mode 100644 index 0000000000..c04147e7ac --- /dev/null +++ b/queue-6.14/cpufreq-fix-compile-test-defaults.patch @@ -0,0 +1,80 @@ +From 3a0eaccb1e5224d271a72931c33fde40e74141db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 09:28:38 +0200 +Subject: cpufreq: fix compile-test defaults + +From: Johan Hovold + +[ Upstream commit a374f28700abd20e8a7d026f89aa26f759445918 ] + +Commit 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers") +enabled compile testing of most Arm CPUFreq drivers but left the +existing default values unchanged so that many drivers are enabled by +default whenever COMPILE_TEST is selected. + +This specifically results in the S3C64XX CPUFreq driver being enabled +and initialised during boot of non-S3C64XX platforms with the following +error logged: + + cpufreq: Unable to obtain ARMCLK: -2 + +Commit d4f610a9bafd ("cpufreq: Do not enable by default during compile +testing") recently fixed most of the default values, but two entries +were missed and two could use a more specific default condition. + +Fix the default values for drivers that can be compile tested and that +should be enabled by default when not compile testing. + +Fixes: 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers") +Cc: Rob Herring (Arm) +Signed-off-by: Johan Hovold +Reviewed-by: Krzysztof Kozlowski +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/Kconfig.arm | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm +index d4d625ded285f..0d46402e30942 100644 +--- a/drivers/cpufreq/Kconfig.arm ++++ b/drivers/cpufreq/Kconfig.arm +@@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ + config ARM_BRCMSTB_AVS_CPUFREQ + tristate "Broadcom STB AVS CPUfreq driver" + depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST +- default ARCH_BRCMSTB ++ default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ + help + Some Broadcom STB SoCs use a co-processor running proprietary firmware + ("AVS") to handle voltage and frequency scaling. This driver provides +@@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ + tristate "Calxeda Highbank-based" + depends on ARCH_HIGHBANK || COMPILE_TEST + depends on CPUFREQ_DT && REGULATOR && PL320_MBOX +- default m ++ default m if ARCH_HIGHBANK + help + This adds the CPUFreq driver for Calxeda Highbank SoC + based boards. +@@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ + config ARM_MEDIATEK_CPUFREQ_HW + tristate "MediaTek CPUFreq HW driver" + depends on ARCH_MEDIATEK || COMPILE_TEST +- default m ++ default m if ARCH_MEDIATEK + help + Support for the CPUFreq HW driver. + Some MediaTek chipsets have a HW engine to offload the steps +@@ -256,7 +256,7 @@ config ARM_TEGRA194_CPUFREQ + tristate "Tegra194 CPUFreq support" + depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) + depends on TEGRA_BPMP +- default ARCH_TEGRA ++ default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC + help + This adds CPU frequency driver support for Tegra194 SOCs. + +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-scmi-fix-null-ptr-deref-in-scmi_cpufreq_get_.patch b/queue-6.14/cpufreq-scmi-fix-null-ptr-deref-in-scmi_cpufreq_get_.patch new file mode 100644 index 0000000000..d41730e08b --- /dev/null +++ b/queue-6.14/cpufreq-scmi-fix-null-ptr-deref-in-scmi_cpufreq_get_.patch @@ -0,0 +1,51 @@ +From 4f9719687b9613b4a71c00b5c3a0e812d7de8d68 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Apr 2025 23:03:53 +0800 +Subject: cpufreq: scmi: Fix null-ptr-deref in scmi_cpufreq_get_rate() + +From: Henry Martin + +[ Upstream commit 484d3f15cc6cbaa52541d6259778e715b2c83c54 ] + +cpufreq_cpu_get_raw() can return NULL when the target CPU is not present +in the policy->cpus mask. scmi_cpufreq_get_rate() does not check for +this case, which results in a NULL pointer dereference. + +Add NULL check after cpufreq_cpu_get_raw() to prevent this issue. + +Fixes: 99d6bdf33877 ("cpufreq: add support for CPU DVFS based on SCMI message protocol") +Signed-off-by: Henry Martin +Acked-by: Sudeep Holla +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/scmi-cpufreq.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c +index 914bf2c940a03..9c6eb1238f1be 100644 +--- a/drivers/cpufreq/scmi-cpufreq.c ++++ b/drivers/cpufreq/scmi-cpufreq.c +@@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; + + static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) + { +- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); +- struct scmi_data *priv = policy->driver_data; ++ struct cpufreq_policy *policy; ++ struct scmi_data *priv; + unsigned long rate; + int ret; + ++ policy = cpufreq_cpu_get_raw(cpu); ++ if (unlikely(!policy)) ++ return 0; ++ ++ priv = policy->driver_data; ++ + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); + if (ret) + return 0; +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-scpi-fix-null-ptr-deref-in-scpi_cpufreq_get_.patch b/queue-6.14/cpufreq-scpi-fix-null-ptr-deref-in-scpi_cpufreq_get_.patch new file mode 100644 index 0000000000..568ecd0272 --- /dev/null +++ b/queue-6.14/cpufreq-scpi-fix-null-ptr-deref-in-scpi_cpufreq_get_.patch @@ -0,0 +1,49 @@ +From b09d7e9afd73a36a06a08bb4f4f8223a3a892c96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Apr 2025 23:03:54 +0800 +Subject: cpufreq: scpi: Fix null-ptr-deref in scpi_cpufreq_get_rate() + +From: Henry Martin + +[ Upstream commit 73b24dc731731edf762f9454552cb3a5b7224949 ] + +cpufreq_cpu_get_raw() can return NULL when the target CPU is not present +in the policy->cpus mask. scpi_cpufreq_get_rate() does not check for +this case, which results in a NULL pointer dereference. + +Fixes: 343a8d17fa8d ("cpufreq: scpi: remove arm_big_little dependency") +Signed-off-by: Henry Martin +Acked-by: Sudeep Holla +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/scpi-cpufreq.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c +index 1f97b949763fa..9118856e17365 100644 +--- a/drivers/cpufreq/scpi-cpufreq.c ++++ b/drivers/cpufreq/scpi-cpufreq.c +@@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; + + static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) + { +- struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); +- struct scpi_data *priv = policy->driver_data; +- unsigned long rate = clk_get_rate(priv->clk); ++ struct cpufreq_policy *policy; ++ struct scpi_data *priv; ++ unsigned long rate; ++ ++ policy = cpufreq_cpu_get_raw(cpu); ++ if (unlikely(!policy)) ++ return 0; ++ ++ priv = policy->driver_data; ++ rate = clk_get_rate(priv->clk); + + return rate / 1000; + } +-- +2.39.5 + diff --git a/queue-6.14/cpufreq-sun50i-prevent-out-of-bounds-access.patch b/queue-6.14/cpufreq-sun50i-prevent-out-of-bounds-access.patch new file mode 100644 index 0000000000..f3e85f6314 --- /dev/null +++ b/queue-6.14/cpufreq-sun50i-prevent-out-of-bounds-access.patch @@ -0,0 +1,81 @@ +From e4740cd400b0d34137fa688dcc5e93d9a8344fe9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Mar 2025 15:55:57 +0000 +Subject: cpufreq: sun50i: prevent out-of-bounds access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Andre Przywara + +[ Upstream commit 14c8a418159e541d70dbf8fc71225d1623beaf0f ] + +A KASAN enabled kernel reports an out-of-bounds access when handling the +nvmem cell in the sun50i cpufreq driver: +================================================================== +BUG: KASAN: slab-out-of-bounds in sun50i_cpufreq_nvmem_probe+0x180/0x3d4 +Read of size 4 at addr ffff000006bf31e0 by task kworker/u16:1/38 + +This is because the DT specifies the nvmem cell as covering only two +bytes, but we use a u32 pointer to read the value. DTs for other SoCs +indeed specify 4 bytes, so we cannot just shorten the variable to a u16. + +Fortunately nvmem_cell_read() allows to return the length of the nvmem +cell, in bytes, so we can use that information to only access the valid +portion of the data. +To cover multiple cell sizes, use memcpy() to copy the information into a +zeroed u32 buffer, then also make sure we always read the data in little +endian fashion, as this is how the data is stored in the SID efuses. + +Fixes: 6cc4bcceff9a ("cpufreq: sun50i: Refactor speed bin decoding") +Reported-by: Jernej Skrabec +Signed-off-by: Andre Przywara +Reviewed-by: Jernej Škrabec +Signed-off-by: Viresh Kumar +Signed-off-by: Sasha Levin +--- + drivers/cpufreq/sun50i-cpufreq-nvmem.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c +index 47d6840b34899..744312a44279c 100644 +--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c ++++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c +@@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) + struct nvmem_cell *speedbin_nvmem; + const struct of_device_id *match; + struct device *cpu_dev; +- u32 *speedbin; ++ void *speedbin_ptr; ++ u32 speedbin = 0; ++ size_t len; + int ret; + + cpu_dev = get_cpu_device(0); +@@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) + return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), + "Could not get nvmem cell\n"); + +- speedbin = nvmem_cell_read(speedbin_nvmem, NULL); ++ speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); + nvmem_cell_put(speedbin_nvmem); +- if (IS_ERR(speedbin)) +- return PTR_ERR(speedbin); ++ if (IS_ERR(speedbin_ptr)) ++ return PTR_ERR(speedbin_ptr); + +- ret = opp_data->efuse_xlate(*speedbin); ++ if (len <= 4) ++ memcpy(&speedbin, speedbin_ptr, len); ++ speedbin = le32_to_cpu(speedbin); + +- kfree(speedbin); ++ ret = opp_data->efuse_xlate(speedbin); ++ ++ kfree(speedbin_ptr); + + return ret; + }; +-- +2.39.5 + diff --git a/queue-6.14/dma-contiguous-avoid-warning-about-unused-size_bytes.patch b/queue-6.14/dma-contiguous-avoid-warning-about-unused-size_bytes.patch new file mode 100644 index 0000000000..dbe7f631b7 --- /dev/null +++ b/queue-6.14/dma-contiguous-avoid-warning-about-unused-size_bytes.patch @@ -0,0 +1,42 @@ +From 9ccc66c19f49c381dbe33757538d6032e3b93543 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Apr 2025 17:15:42 +0200 +Subject: dma/contiguous: avoid warning about unused size_bytes + +From: Arnd Bergmann + +[ Upstream commit d7b98ae5221007d3f202746903d4c21c7caf7ea9 ] + +When building with W=1, this variable is unused for configs with +CONFIG_CMA_SIZE_SEL_PERCENTAGE=y: + +kernel/dma/contiguous.c:67:26: error: 'size_bytes' defined but not used [-Werror=unused-const-variable=] + +Change this to a macro to avoid the warning. + +Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator") +Signed-off-by: Arnd Bergmann +Signed-off-by: Marek Szyprowski +Link: https://lore.kernel.org/r/20250409151557.3890443-1-arnd@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/dma/contiguous.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c +index 055da410ac71d..8df0dfaaca18e 100644 +--- a/kernel/dma/contiguous.c ++++ b/kernel/dma/contiguous.c +@@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area; + * Users, who want to set the size of global CMA area for their system + * should use cma= kernel parameter. + */ +-static const phys_addr_t size_bytes __initconst = +- (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; ++#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) + static phys_addr_t size_cmdline __initdata = -1; + static phys_addr_t base_cmdline __initdata; + static phys_addr_t limit_cmdline __initdata; +-- +2.39.5 + diff --git a/queue-6.14/drm-meson-use-unsigned-long-long-hz-for-frequency-ty.patch b/queue-6.14/drm-meson-use-unsigned-long-long-hz-for-frequency-ty.patch new file mode 100644 index 0000000000..5e0ed29949 --- /dev/null +++ b/queue-6.14/drm-meson-use-unsigned-long-long-hz-for-frequency-ty.patch @@ -0,0 +1,649 @@ +From 724521f1225d0508218a2eebae156a33d278a691 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 22:13:00 +0200 +Subject: drm/meson: use unsigned long long / Hz for frequency types + +From: Martin Blumenstingl + +[ Upstream commit 1017560164b6bbcbc93579266926e6e96675262a ] + +Christian reports that 4K output using YUV420 encoding fails with the +following error: + Fatal Error, invalid HDMI vclk freq 593406 + +Modetest shows the following: + 3840x2160 59.94 3840 4016 4104 4400 2160 2168 2178 2250 593407 flags: xxxx, xxxx, + drm calculated value -------------------------------------^ + +This indicates that there's a (1kHz) mismatch between the clock +calculated by the drm framework and the meson driver. + +Relevant function call stack: +(drm framework) + -> meson_encoder_hdmi_atomic_enable() + -> meson_encoder_hdmi_set_vclk() + -> meson_vclk_setup() + +The video clock requested by the drm framework is 593407kHz. This is +passed by meson_encoder_hdmi_atomic_enable() to +meson_encoder_hdmi_set_vclk() and the following formula is applied: +- the frequency is halved (which would be 296703.5kHz) and rounded down + to the next full integer, which is 296703kHz +- TMDS clock is calculated (296703kHz * 10) +- video encoder clock is calculated - this needs to match a table from + meson_vclk.c and so it doubles the previously halved value again + (resulting in 593406kHz) +- meson_vclk_setup() can't find (either directly, or by deriving it from + 594000kHz * 1000 / 1001 and rounding to the closest integer value - + which is 593407kHz as originally requested by the drm framework) a + matching clock in it's internal table and errors out with "invalid + HDMI vclk freq" + +Fix the division precision by switching the whole meson driver to use +unsigned long long (64-bit) Hz values for clock frequencies instead of +unsigned int (32-bit) kHz to fix the rouding error. + +Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") +Reported-by: Christian Hewitt +Signed-off-by: Martin Blumenstingl +Reviewed-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com +Signed-off-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/meson/meson_drv.c | 2 +- + drivers/gpu/drm/meson/meson_drv.h | 2 +- + drivers/gpu/drm/meson/meson_encoder_hdmi.c | 29 +-- + drivers/gpu/drm/meson/meson_vclk.c | 195 +++++++++++---------- + drivers/gpu/drm/meson/meson_vclk.h | 13 +- + 5 files changed, 126 insertions(+), 115 deletions(-) + +diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c +index 81d2ee37e7732..49ff9f1f16d32 100644 +--- a/drivers/gpu/drm/meson/meson_drv.c ++++ b/drivers/gpu/drm/meson/meson_drv.c +@@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { + /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ + { + .limits = { +- .max_hdmi_phy_freq = 1650000, ++ .max_hdmi_phy_freq = 1650000000, + }, + .attrs = (const struct soc_device_attribute []) { + { .soc_id = "GXL (S805*)", }, +diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h +index 3f9345c14f31c..be4b0e4df6e13 100644 +--- a/drivers/gpu/drm/meson/meson_drv.h ++++ b/drivers/gpu/drm/meson/meson_drv.h +@@ -37,7 +37,7 @@ struct meson_drm_match_data { + }; + + struct meson_drm_soc_limits { +- unsigned int max_hdmi_phy_freq; ++ unsigned long long max_hdmi_phy_freq; + }; + + struct meson_drm { +diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c +index 0593a1cde906f..ce8cea5d3a56b 100644 +--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c ++++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c +@@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, + { + struct meson_drm *priv = encoder_hdmi->priv; + int vic = drm_match_cea_mode(mode); +- unsigned int phy_freq; +- unsigned int vclk_freq; +- unsigned int venc_freq; +- unsigned int hdmi_freq; ++ unsigned long long phy_freq; ++ unsigned long long vclk_freq; ++ unsigned long long venc_freq; ++ unsigned long long hdmi_freq; + +- vclk_freq = mode->clock; ++ vclk_freq = mode->clock * 1000; + + /* For 420, pixel clock is half unlike venc clock */ + if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) +@@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + venc_freq /= 2; + +- dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", ++ dev_dbg(priv->dev, ++ "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", + phy_freq, vclk_freq, venc_freq, hdmi_freq, + priv->venc.hdmi_use_enci); + +@@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri + struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); + struct meson_drm *priv = encoder_hdmi->priv; + bool is_hdmi2_sink = display_info->hdmi.scdc.supported; +- unsigned int phy_freq; +- unsigned int vclk_freq; +- unsigned int venc_freq; +- unsigned int hdmi_freq; ++ unsigned long long clock = mode->clock * 1000; ++ unsigned long long phy_freq; ++ unsigned long long vclk_freq; ++ unsigned long long venc_freq; ++ unsigned long long hdmi_freq; + int vic = drm_match_cea_mode(mode); + enum drm_mode_status status; + +@@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri + if (status != MODE_OK) + return status; + +- return meson_vclk_dmt_supported_freq(priv, mode->clock); ++ return meson_vclk_dmt_supported_freq(priv, clock); + /* Check against supported VIC modes */ + } else if (!meson_venc_hdmi_supported_vic(vic)) + return MODE_BAD; + +- vclk_freq = mode->clock; ++ vclk_freq = clock; + + /* For 420, pixel clock is half unlike venc clock */ + if (drm_mode_is_420_only(display_info, mode) || +@@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + venc_freq /= 2; + +- dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", ++ dev_dbg(priv->dev, ++ "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", + __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); + + return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); +diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c +index 2a82119eb58ed..3325580d885d0 100644 +--- a/drivers/gpu/drm/meson/meson_vclk.c ++++ b/drivers/gpu/drm/meson/meson_vclk.c +@@ -110,7 +110,10 @@ + #define HDMI_PLL_LOCK BIT(31) + #define HDMI_PLL_LOCK_G12A (3 << 30) + +-#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) ++#define PIXEL_FREQ_1000_1001(_freq) \ ++ DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) ++#define PHY_FREQ_1000_1001(_freq) \ ++ (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) + + /* VID PLL Dividers */ + enum { +@@ -360,11 +363,11 @@ enum { + }; + + struct meson_vclk_params { +- unsigned int pll_freq; +- unsigned int phy_freq; +- unsigned int vclk_freq; +- unsigned int venc_freq; +- unsigned int pixel_freq; ++ unsigned long long pll_freq; ++ unsigned long long phy_freq; ++ unsigned long long vclk_freq; ++ unsigned long long venc_freq; ++ unsigned long long pixel_freq; + unsigned int pll_od1; + unsigned int pll_od2; + unsigned int pll_od3; +@@ -372,11 +375,11 @@ struct meson_vclk_params { + unsigned int vclk_div; + } params[] = { + [MESON_VCLK_HDMI_ENCI_54000] = { +- .pll_freq = 4320000, +- .phy_freq = 270000, +- .vclk_freq = 54000, +- .venc_freq = 54000, +- .pixel_freq = 54000, ++ .pll_freq = 4320000000, ++ .phy_freq = 270000000, ++ .vclk_freq = 54000000, ++ .venc_freq = 54000000, ++ .pixel_freq = 54000000, + .pll_od1 = 4, + .pll_od2 = 4, + .pll_od3 = 1, +@@ -384,11 +387,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_DDR_54000] = { +- .pll_freq = 4320000, +- .phy_freq = 270000, +- .vclk_freq = 54000, +- .venc_freq = 54000, +- .pixel_freq = 27000, ++ .pll_freq = 4320000000, ++ .phy_freq = 270000000, ++ .vclk_freq = 54000000, ++ .venc_freq = 54000000, ++ .pixel_freq = 27000000, + .pll_od1 = 4, + .pll_od2 = 4, + .pll_od3 = 1, +@@ -396,11 +399,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_DDR_148500] = { +- .pll_freq = 2970000, +- .phy_freq = 742500, +- .vclk_freq = 148500, +- .venc_freq = 148500, +- .pixel_freq = 74250, ++ .pll_freq = 2970000000, ++ .phy_freq = 742500000, ++ .vclk_freq = 148500000, ++ .venc_freq = 148500000, ++ .pixel_freq = 74250000, + .pll_od1 = 4, + .pll_od2 = 1, + .pll_od3 = 1, +@@ -408,11 +411,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_74250] = { +- .pll_freq = 2970000, +- .phy_freq = 742500, +- .vclk_freq = 74250, +- .venc_freq = 74250, +- .pixel_freq = 74250, ++ .pll_freq = 2970000000, ++ .phy_freq = 742500000, ++ .vclk_freq = 74250000, ++ .venc_freq = 74250000, ++ .pixel_freq = 74250000, + .pll_od1 = 2, + .pll_od2 = 2, + .pll_od3 = 2, +@@ -420,11 +423,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_148500] = { +- .pll_freq = 2970000, +- .phy_freq = 1485000, +- .vclk_freq = 148500, +- .venc_freq = 148500, +- .pixel_freq = 148500, ++ .pll_freq = 2970000000, ++ .phy_freq = 1485000000, ++ .vclk_freq = 148500000, ++ .venc_freq = 148500000, ++ .pixel_freq = 148500000, + .pll_od1 = 1, + .pll_od2 = 2, + .pll_od3 = 2, +@@ -432,11 +435,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_297000] = { +- .pll_freq = 5940000, +- .phy_freq = 2970000, +- .venc_freq = 297000, +- .vclk_freq = 297000, +- .pixel_freq = 297000, ++ .pll_freq = 5940000000, ++ .phy_freq = 2970000000, ++ .venc_freq = 297000000, ++ .vclk_freq = 297000000, ++ .pixel_freq = 297000000, + .pll_od1 = 2, + .pll_od2 = 1, + .pll_od3 = 1, +@@ -444,11 +447,11 @@ struct meson_vclk_params { + .vclk_div = 2, + }, + [MESON_VCLK_HDMI_594000] = { +- .pll_freq = 5940000, +- .phy_freq = 5940000, +- .venc_freq = 594000, +- .vclk_freq = 594000, +- .pixel_freq = 594000, ++ .pll_freq = 5940000000, ++ .phy_freq = 5940000000, ++ .venc_freq = 594000000, ++ .vclk_freq = 594000000, ++ .pixel_freq = 594000000, + .pll_od1 = 1, + .pll_od2 = 1, + .pll_od3 = 2, +@@ -456,11 +459,11 @@ struct meson_vclk_params { + .vclk_div = 1, + }, + [MESON_VCLK_HDMI_594000_YUV420] = { +- .pll_freq = 5940000, +- .phy_freq = 2970000, +- .venc_freq = 594000, +- .vclk_freq = 594000, +- .pixel_freq = 297000, ++ .pll_freq = 5940000000, ++ .phy_freq = 2970000000, ++ .venc_freq = 594000000, ++ .vclk_freq = 594000000, ++ .pixel_freq = 297000000, + .pll_od1 = 2, + .pll_od2 = 1, + .pll_od3 = 1, +@@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, + 3 << 20, pll_od_to_reg(od3) << 20); + } + +-#define XTAL_FREQ 24000 ++#define XTAL_FREQ (24 * 1000 * 1000) + + static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, +- unsigned int pll_freq) ++ unsigned long long pll_freq) + { + /* The GXBB PLL has a /2 pre-multiplier */ + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) +- pll_freq /= 2; ++ pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); + +- return pll_freq / XTAL_FREQ; ++ return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); + } + + #define HDMI_FRAC_MAX_GXBB 4096 +@@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, + + static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, + unsigned int m, +- unsigned int pll_freq) ++ unsigned long long pll_freq) + { +- unsigned int parent_freq = XTAL_FREQ; ++ unsigned long long parent_freq = XTAL_FREQ; + unsigned int frac_max = HDMI_FRAC_MAX_GXL; + unsigned int frac_m; + unsigned int frac; ++ u32 remainder; + + /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { +@@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, + frac_max = HDMI_FRAC_MAX_G12A; + + /* We can have a perfect match !*/ +- if (pll_freq / m == parent_freq && +- pll_freq % m == 0) ++ if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && ++ remainder == 0) + return 0; + +- frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); ++ frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); + frac_m = m * frac_max; + if (frac_m > frac) + return frac_max; +@@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, + } + + static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, +- unsigned int m, ++ unsigned long long m, + unsigned int frac) + { + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { +@@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, + } + + static bool meson_hdmi_pll_find_params(struct meson_drm *priv, +- unsigned int freq, ++ unsigned long long freq, + unsigned int *m, + unsigned int *frac, + unsigned int *od) +@@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, + continue; + *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); + +- DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", ++ DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", + freq, *m, *frac, *od); + + if (meson_hdmi_pll_validate_params(priv, *m, *frac)) +@@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, + + /* pll_freq is the frequency after the OD dividers */ + enum drm_mode_status +-meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) ++meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) + { + unsigned int od, m, frac; + +@@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); + + /* pll_freq is the frequency after the OD dividers */ + static void meson_hdmi_pll_generic_set(struct meson_drm *priv, +- unsigned int pll_freq) ++ unsigned long long pll_freq) + { + unsigned int od, m, frac, od1, od2, od3; + +@@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, + od1 = od / od2; + } + +- DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", ++ DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", + pll_freq, m, frac, od1, od2, od3); + + meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); +@@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, + return; + } + +- DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", ++ DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", + pll_freq); + } + + enum drm_mode_status +-meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, +- unsigned int vclk_freq) ++meson_vclk_vic_supported_freq(struct meson_drm *priv, ++ unsigned long long phy_freq, ++ unsigned long long vclk_freq) + { + int i; + +- DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", ++ DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", + phy_freq, vclk_freq); + + /* Check against soc revision/package limits */ +@@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, + } + + for (i = 0 ; params[i].pixel_freq ; ++i) { +- DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", ++ DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", + i, params[i].pixel_freq, +- FREQ_1000_1001(params[i].pixel_freq)); +- DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", ++ PIXEL_FREQ_1000_1001(params[i].pixel_freq)); ++ DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + i, params[i].phy_freq, +- FREQ_1000_1001(params[i].phy_freq/10)*10); ++ PHY_FREQ_1000_1001(params[i].phy_freq)); + /* Match strict frequency */ + if (phy_freq == params[i].phy_freq && + vclk_freq == params[i].vclk_freq) + return MODE_OK; + /* Match 1000/1001 variant */ +- if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && +- vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) ++ if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && ++ vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) + return MODE_OK; + } + +@@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, + } + EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); + +-static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, +- unsigned int od1, unsigned int od2, unsigned int od3, ++static void meson_vclk_set(struct meson_drm *priv, ++ unsigned long long pll_base_freq, unsigned int od1, ++ unsigned int od2, unsigned int od3, + unsigned int vid_pll_div, unsigned int vclk_div, + unsigned int hdmi_tx_div, unsigned int venc_div, + bool hdmi_use_enci, bool vic_alternate_clock) +@@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, + meson_hdmi_pll_generic_set(priv, pll_base_freq); + } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { + switch (pll_base_freq) { +- case 2970000: ++ case 2970000000: + m = 0x3d; + frac = vic_alternate_clock ? 0xd02 : 0xe00; + break; +- case 4320000: ++ case 4320000000: + m = vic_alternate_clock ? 0x59 : 0x5a; + frac = vic_alternate_clock ? 0xe8f : 0; + break; +- case 5940000: ++ case 5940000000: + m = 0x7b; + frac = vic_alternate_clock ? 0xa05 : 0xc00; + break; +@@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, + } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || + meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { + switch (pll_base_freq) { +- case 2970000: ++ case 2970000000: + m = 0x7b; + frac = vic_alternate_clock ? 0x281 : 0x300; + break; +- case 4320000: ++ case 4320000000: + m = vic_alternate_clock ? 0xb3 : 0xb4; + frac = vic_alternate_clock ? 0x347 : 0; + break; +- case 5940000: ++ case 5940000000: + m = 0xf7; + frac = vic_alternate_clock ? 0x102 : 0x200; + break; +@@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, + meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); + } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { + switch (pll_base_freq) { +- case 2970000: ++ case 2970000000: + m = 0x7b; + frac = vic_alternate_clock ? 0x140b4 : 0x18000; + break; +- case 4320000: ++ case 4320000000: + m = vic_alternate_clock ? 0xb3 : 0xb4; + frac = vic_alternate_clock ? 0x1a3ee : 0; + break; +- case 5940000: ++ case 5940000000: + m = 0xf7; + frac = vic_alternate_clock ? 0x8148 : 0x10000; + break; +@@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, + } + + void meson_vclk_setup(struct meson_drm *priv, unsigned int target, +- unsigned int phy_freq, unsigned int vclk_freq, +- unsigned int venc_freq, unsigned int dac_freq, ++ unsigned long long phy_freq, unsigned long long vclk_freq, ++ unsigned long long venc_freq, unsigned long long dac_freq, + bool hdmi_use_enci) + { + bool vic_alternate_clock = false; +- unsigned int freq; +- unsigned int hdmi_tx_div; +- unsigned int venc_div; ++ unsigned long long freq; ++ unsigned long long hdmi_tx_div; ++ unsigned long long venc_div; + + if (target == MESON_VCLK_TARGET_CVBS) { + meson_venci_cvbs_clock_config(priv); +@@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, + return; + } + +- hdmi_tx_div = vclk_freq / dac_freq; ++ hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); + + if (hdmi_tx_div == 0) { +- pr_err("Fatal Error, invalid HDMI-TX freq %d\n", ++ pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", + dac_freq); + return; + } + +- venc_div = vclk_freq / venc_freq; ++ venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); + + if (venc_div == 0) { +- pr_err("Fatal Error, invalid HDMI venc freq %d\n", ++ pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", + venc_freq); + return; + } + + for (freq = 0 ; params[freq].pixel_freq ; ++freq) { + if ((phy_freq == params[freq].phy_freq || +- phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && ++ phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && + (vclk_freq == params[freq].vclk_freq || +- vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { ++ vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { + if (vclk_freq != params[freq].vclk_freq) + vic_alternate_clock = true; + else +@@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, + } + + if (!params[freq].pixel_freq) { +- pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); ++ pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", ++ vclk_freq); + return; + } + +diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h +index 60617aaf18dd1..7ac55744e5749 100644 +--- a/drivers/gpu/drm/meson/meson_vclk.h ++++ b/drivers/gpu/drm/meson/meson_vclk.h +@@ -20,17 +20,18 @@ enum { + }; + + /* 27MHz is the CVBS Pixel Clock */ +-#define MESON_VCLK_CVBS 27000 ++#define MESON_VCLK_CVBS (27 * 1000 * 1000) + + enum drm_mode_status +-meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); ++meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); + enum drm_mode_status +-meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, +- unsigned int vclk_freq); ++meson_vclk_vic_supported_freq(struct meson_drm *priv, ++ unsigned long long phy_freq, ++ unsigned long long vclk_freq); + + void meson_vclk_setup(struct meson_drm *priv, unsigned int target, +- unsigned int phy_freq, unsigned int vclk_freq, +- unsigned int venc_freq, unsigned int dac_freq, ++ unsigned long long phy_freq, unsigned long long vclk_freq, ++ unsigned long long venc_freq, unsigned long long dac_freq, + bool hdmi_use_enci); + + #endif /* __MESON_VCLK_H */ +-- +2.39.5 + diff --git a/queue-6.14/fix-a-couple-of-races-in-mnt_tree_beneath-handling-b.patch b/queue-6.14/fix-a-couple-of-races-in-mnt_tree_beneath-handling-b.patch new file mode 100644 index 0000000000..27fa483286 --- /dev/null +++ b/queue-6.14/fix-a-couple-of-races-in-mnt_tree_beneath-handling-b.patch @@ -0,0 +1,181 @@ +From 256a362e4674a14e838047c10ce2f4286c807e46 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 02:30:34 +0100 +Subject: fix a couple of races in MNT_TREE_BENEATH handling by do_move_mount() + +From: Al Viro + +[ Upstream commit 0d039eac6e5950f9d1ecc9e410c2fd1feaeab3b6 ] + +Normally do_lock_mount(path, _) is locking a mountpoint pinned by +*path and at the time when matching unlock_mount() unlocks that +location it is still pinned by the same thing. + +Unfortunately, for 'beneath' case it's no longer that simple - +the object being locked is not the one *path points to. It's the +mountpoint of path->mnt. The thing is, without sufficient locking +->mnt_parent may change under us and none of the locks are held +at that point. The rules are + * mount_lock stabilizes m->mnt_parent for any mount m. + * namespace_sem stabilizes m->mnt_parent, provided that +m is mounted. + * if either of the above holds and refcount of m is positive, +we are guaranteed the same for refcount of m->mnt_parent. + +namespace_sem nests inside inode_lock(), so do_lock_mount() has +to take inode_lock() before grabbing namespace_sem. It does +recheck that path->mnt is still mounted in the same place after +getting namespace_sem, and it does take care to pin the dentry. +It is needed, since otherwise we might end up with racing mount --move +(or umount) happening while we were getting locks; in that case +dentry would no longer be a mountpoint and could've been evicted +on memory pressure along with its inode - not something you want +when grabbing lock on that inode. + +However, pinning a dentry is not enough - the matching mount is +also pinned only by the fact that path->mnt is mounted on top it +and at that point we are not holding any locks whatsoever, so +the same kind of races could end up with all references to +that mount gone just as we are about to enter inode_lock(). +If that happens, we are left with filesystem being shut down while +we are holding a dentry reference on it; results are not pretty. + +What we need to do is grab both dentry and mount at the same time; +that makes inode_lock() safe *and* avoids the problem with fs getting +shut down under us. After taking namespace_sem we verify that +path->mnt is still mounted (which stabilizes its ->mnt_parent) and +check that it's still mounted at the same place. From that point +on to the matching namespace_unlock() we are guaranteed that +mount/dentry pair we'd grabbed are also pinned by being the mountpoint +of path->mnt, so we can quietly drop both the dentry reference (as +the current code does) and mnt one - it's OK to do under namespace_sem, +since we are not dropping the final refs. + +That solves the problem on do_lock_mount() side; unlock_mount() +also has one, since dentry is guaranteed to stay pinned only until +the namespace_unlock(). That's easy to fix - just have inode_unlock() +done earlier, while it's still pinned by mp->m_dentry. + +Fixes: 6ac392815628 "fs: allow to mount beneath top mount" # v6.5+ +Signed-off-by: Al Viro +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/namespace.c | 69 ++++++++++++++++++++++++++------------------------ + 1 file changed, 36 insertions(+), 33 deletions(-) + +diff --git a/fs/namespace.c b/fs/namespace.c +index d401486fe95d1..280a6ebc46d93 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2640,56 +2640,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) + struct vfsmount *mnt = path->mnt; + struct dentry *dentry; + struct mountpoint *mp = ERR_PTR(-ENOENT); ++ struct path under = {}; + + for (;;) { +- struct mount *m; ++ struct mount *m = real_mount(mnt); + + if (beneath) { +- m = real_mount(mnt); ++ path_put(&under); + read_seqlock_excl(&mount_lock); +- dentry = dget(m->mnt_mountpoint); ++ under.mnt = mntget(&m->mnt_parent->mnt); ++ under.dentry = dget(m->mnt_mountpoint); + read_sequnlock_excl(&mount_lock); ++ dentry = under.dentry; + } else { + dentry = path->dentry; + } + + inode_lock(dentry->d_inode); +- if (unlikely(cant_mount(dentry))) { +- inode_unlock(dentry->d_inode); +- goto out; +- } +- + namespace_lock(); + +- if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { ++ if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) ++ break; // not to be mounted on ++ ++ if (beneath && unlikely(m->mnt_mountpoint != dentry || ++ &m->mnt_parent->mnt != under.mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); +- goto out; ++ continue; // got moved + } + + mnt = lookup_mnt(path); +- if (likely(!mnt)) ++ if (unlikely(mnt)) { ++ namespace_unlock(); ++ inode_unlock(dentry->d_inode); ++ path_put(path); ++ path->mnt = mnt; ++ path->dentry = dget(mnt->mnt_root); ++ continue; // got overmounted ++ } ++ mp = get_mountpoint(dentry); ++ if (IS_ERR(mp)) + break; +- +- namespace_unlock(); +- inode_unlock(dentry->d_inode); +- if (beneath) +- dput(dentry); +- path_put(path); +- path->mnt = mnt; +- path->dentry = dget(mnt->mnt_root); +- } +- +- mp = get_mountpoint(dentry); +- if (IS_ERR(mp)) { +- namespace_unlock(); +- inode_unlock(dentry->d_inode); ++ if (beneath) { ++ /* ++ * @under duplicates the references that will stay ++ * at least until namespace_unlock(), so the path_put() ++ * below is safe (and OK to do under namespace_lock - ++ * we are not dropping the final references here). ++ */ ++ path_put(&under); ++ } ++ return mp; + } +- +-out: ++ namespace_unlock(); ++ inode_unlock(dentry->d_inode); + if (beneath) +- dput(dentry); +- ++ path_put(&under); + return mp; + } + +@@ -2700,14 +2706,11 @@ static inline struct mountpoint *lock_mount(struct path *path) + + static void unlock_mount(struct mountpoint *where) + { +- struct dentry *dentry = where->m_dentry; +- ++ inode_unlock(where->m_dentry->d_inode); + read_seqlock_excl(&mount_lock); + put_mountpoint(where); + read_sequnlock_excl(&mount_lock); +- + namespace_unlock(); +- inode_unlock(dentry->d_inode); + } + + static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) +-- +2.39.5 + diff --git a/queue-6.14/fs-xattr-fix-handling-of-at_fdcwd-in-setxattrat-2-an.patch b/queue-6.14/fs-xattr-fix-handling-of-at_fdcwd-in-setxattrat-2-an.patch new file mode 100644 index 0000000000..ca8a6d9f67 --- /dev/null +++ b/queue-6.14/fs-xattr-fix-handling-of-at_fdcwd-in-setxattrat-2-an.patch @@ -0,0 +1,47 @@ +From 04df2853e177fb661b6fab3c606541ac28d30881 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 15:22:47 +0200 +Subject: fs/xattr: Fix handling of AT_FDCWD in setxattrat(2) and getxattrat(2) + +From: Jan Kara + +[ Upstream commit f520bed25d17bb31c2d2d72b0a785b593a4e3179 ] + +Currently, setxattrat(2) and getxattrat(2) are wrongly handling the +calls of the from setxattrat(AF_FDCWD, NULL, AT_EMPTY_PATH, ...) and +fail with -EBADF error instead of operating on CWD. Fix it. + +Fixes: 6140be90ec70 ("fs/xattr: add *at family syscalls") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/20250424132246.16822-2-jack@suse.cz +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/xattr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/xattr.c b/fs/xattr.c +index 02bee149ad967..fabb2a04501ee 100644 +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname, + return error; + + filename = getname_maybe_null(pathname, at_flags); +- if (!filename) { ++ if (!filename && dfd >= 0) { + CLASS(fd, f)(dfd); + if (fd_empty(f)) + error = -EBADF; +@@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname, + return error; + + filename = getname_maybe_null(pathname, at_flags); +- if (!filename) { ++ if (!filename && dfd >= 0) { + CLASS(fd, f)(dfd); + if (fd_empty(f)) + return -EBADF; +-- +2.39.5 + diff --git a/queue-6.14/iommu-amd-return-an-error-if-vcpu-affinity-is-set-fo.patch b/queue-6.14/iommu-amd-return-an-error-if-vcpu-affinity-is-set-fo.patch new file mode 100644 index 0000000000..f9edd2cf5b --- /dev/null +++ b/queue-6.14/iommu-amd-return-an-error-if-vcpu-affinity-is-set-fo.patch @@ -0,0 +1,38 @@ +From 56dda222aa207c6abc0196697b72a678cda1716a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Apr 2025 12:38:20 -0700 +Subject: iommu/amd: Return an error if vCPU affinity is set for non-vCPU IRTE + +From: Sean Christopherson + +[ Upstream commit 07172206a26dcf3f0bf7c3ecaadd4242b008ea54 ] + +Return -EINVAL instead of success if amd_ir_set_vcpu_affinity() is +invoked without use_vapic; lying to KVM about whether or not the IRTE was +configured to post IRQs is all kinds of bad. + +Fixes: d98de49a53e4 ("iommu/amd: Enable vAPIC interrupt remapping mode by default") +Signed-off-by: Sean Christopherson +Message-ID: <20250404193923.1413163-6-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + drivers/iommu/amd/iommu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c +index cd5116d8c3b28..b3a01b7757ee1 100644 +--- a/drivers/iommu/amd/iommu.c ++++ b/drivers/iommu/amd/iommu.c +@@ -3850,7 +3850,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) + * we should not modify the IRTE + */ + if (!dev_data || !dev_data->use_vapic) +- return 0; ++ return -EINVAL; + + ir_data->cfg = irqd_cfg(data); + pi_data->ir_data = ir_data; +-- +2.39.5 + diff --git a/queue-6.14/loongarch-make-do_xyz-exception-handlers-more-robust.patch b/queue-6.14/loongarch-make-do_xyz-exception-handlers-more-robust.patch new file mode 100644 index 0000000000..010631d7bc --- /dev/null +++ b/queue-6.14/loongarch-make-do_xyz-exception-handlers-more-robust.patch @@ -0,0 +1,140 @@ +From 8a8de2577691defaa57683565d97aa2291292f65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 20:15:41 +0800 +Subject: LoongArch: Make do_xyz() exception handlers more robust + +From: Tiezhu Yang + +[ Upstream commit cc73cc6bcdb5f959670e3ff9abdc62461452ddff ] + +Currently, interrupts need to be disabled before single-step mode is +set, it requires that CSR_PRMD_PIE be cleared in save_local_irqflag() +which is called by setup_singlestep(), this is reasonable. + +But in the first kprobe breakpoint exception, if the irq is enabled at +the beginning of do_bp(), it will not be disabled at the end of do_bp() +due to the CSR_PRMD_PIE has been cleared in save_local_irqflag(). So for +this case, it may corrupt exception context when restoring the exception +after do_bp() in handle_bp(), this is not reasonable. + +In order to restore exception safely in handle_bp(), it needs to ensure +the irq is disabled at the end of do_bp(), so just add a local variable +to record the original interrupt status in the parent context, then use +it as the check condition to enable and disable irq in do_bp(). + +While at it, do the similar thing for other do_xyz() exception handlers +to make them more robust. + +Fixes: 6d4cc40fb5f5 ("LoongArch: Add kprobes support") +Suggested-by: Jinyang He +Suggested-by: Huacai Chen +Co-developed-by: Tianyang Zhang +Signed-off-by: Tianyang Zhang +Signed-off-by: Tiezhu Yang +Signed-off-by: Huacai Chen +Signed-off-by: Sasha Levin +--- + arch/loongarch/kernel/traps.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c +index 2ec3106c0da3d..47fc2de6d1501 100644 +--- a/arch/loongarch/kernel/traps.c ++++ b/arch/loongarch/kernel/traps.c +@@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) + die_if_kernel("Kernel ale access", regs); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); + #else ++ bool pie = regs_irqs_disabled(regs); + unsigned int *pc; + +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_enable(); + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); +@@ -582,7 +583,7 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) + die_if_kernel("Kernel ale access", regs); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); + out: +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_disable(); + #endif + irqentry_exit(regs, state); +@@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs) + asmlinkage void noinstr do_bce(struct pt_regs *regs) + { + bool user = user_mode(regs); ++ bool pie = regs_irqs_disabled(regs); + unsigned long era = exception_era(regs); + u64 badv = 0, lower = 0, upper = ULONG_MAX; + union loongarch_instruction insn; + irqentry_state_t state = irqentry_enter(regs); + +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_enable(); + + current->thread.trap_nr = read_csr_excode(); +@@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) + force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); + + out: +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_disable(); + + irqentry_exit(regs, state); +@@ -710,11 +712,12 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) + asmlinkage void noinstr do_bp(struct pt_regs *regs) + { + bool user = user_mode(regs); ++ bool pie = regs_irqs_disabled(regs); + unsigned int opcode, bcode; + unsigned long era = exception_era(regs); + irqentry_state_t state = irqentry_enter(regs); + +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_enable(); + + if (__get_inst(&opcode, (u32 *)era, user)) +@@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) + } + + out: +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_disable(); + + irqentry_exit(regs, state); +@@ -1015,6 +1018,7 @@ static void init_restore_lbt(void) + + asmlinkage void noinstr do_lbt(struct pt_regs *regs) + { ++ bool pie = regs_irqs_disabled(regs); + irqentry_state_t state = irqentry_enter(regs); + + /* +@@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) + * (including the user using 'MOVGR2GCSR' to turn on TM, which + * will not trigger the BTE), we need to check PRMD first. + */ +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_enable(); + + if (!cpu_has_lbt) { +@@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) + preempt_enable(); + + out: +- if (regs->csr_prmd & CSR_PRMD_PIE) ++ if (!pie) + local_irq_disable(); + + irqentry_exit(regs, state); +-- +2.39.5 + diff --git a/queue-6.14/loongarch-make-regs_irqs_disabled-more-clear.patch b/queue-6.14/loongarch-make-regs_irqs_disabled-more-clear.patch new file mode 100644 index 0000000000..4a78417659 --- /dev/null +++ b/queue-6.14/loongarch-make-regs_irqs_disabled-more-clear.patch @@ -0,0 +1,46 @@ +From 61a82ce754843638c828e0a2df75e08826586e0c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 20:15:41 +0800 +Subject: LoongArch: Make regs_irqs_disabled() more clear + +From: Tiezhu Yang + +[ Upstream commit bb0511d59db9b3e40c8d51f0d151ccd0fd44071d ] + +In the current code, the definition of regs_irqs_disabled() is actually +"!(regs->csr_prmd & CSR_CRMD_IE)" because arch_irqs_disabled_flags() is +defined as "!(flags & CSR_CRMD_IE)", it looks a little strange. + +Define regs_irqs_disabled() as !(regs->csr_prmd & CSR_PRMD_PIE) directly +to make it more clear, no functional change. + +While at it, the return value of regs_irqs_disabled() is true or false, +so change its type to reflect that and also make it always inline. + +Fixes: 803b0fc5c3f2 ("LoongArch: Add process management") +Signed-off-by: Tiezhu Yang +Signed-off-by: Huacai Chen +Signed-off-by: Sasha Levin +--- + arch/loongarch/include/asm/ptrace.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h +index f3ddaed9ef7f0..a5b63c84f8541 100644 +--- a/arch/loongarch/include/asm/ptrace.h ++++ b/arch/loongarch/include/asm/ptrace.h +@@ -33,9 +33,9 @@ struct pt_regs { + unsigned long __last[]; + } __aligned(8); + +-static inline int regs_irqs_disabled(struct pt_regs *regs) ++static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) + { +- return arch_irqs_disabled_flags(regs->csr_prmd); ++ return !(regs->csr_prmd & CSR_PRMD_PIE); + } + + static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +-- +2.39.5 + diff --git a/queue-6.14/loongarch-select-arch_use_memtest.patch b/queue-6.14/loongarch-select-arch_use_memtest.patch new file mode 100644 index 0000000000..03e488120d --- /dev/null +++ b/queue-6.14/loongarch-select-arch_use_memtest.patch @@ -0,0 +1,40 @@ +From baeab4292eb3bbe2b1985c049105f5a53d987d2f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Apr 2025 20:15:22 +0800 +Subject: LoongArch: Select ARCH_USE_MEMTEST + +From: Yuli Wang + +[ Upstream commit fb8e9f59d6f292c3d9fea6c155c22ea5fc3053ab ] + +As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"), +architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST. + +Commit 628c3bb40e9a ("LoongArch: Add boot and setup routines") added +support for early_memtest but did not select ARCH_USE_MEMTESET. + +Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines") +Tested-by: Erpeng Xu +Tested-by: Yuli Wang +Signed-off-by: Yuli Wang +Signed-off-by: Huacai Chen +Signed-off-by: Sasha Levin +--- + arch/loongarch/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig +index bdb989c49c094..b744bd73f08ee 100644 +--- a/arch/loongarch/Kconfig ++++ b/arch/loongarch/Kconfig +@@ -71,6 +71,7 @@ config LOONGARCH + select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF ++ select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_BPF_JIT +-- +2.39.5 + diff --git a/queue-6.14/net-dp83822-fix-of_mdio-config-check.patch b/queue-6.14/net-dp83822-fix-of_mdio-config-check.patch new file mode 100644 index 0000000000..9a0b4c0401 --- /dev/null +++ b/queue-6.14/net-dp83822-fix-of_mdio-config-check.patch @@ -0,0 +1,39 @@ +From c848b90f5f89d68c3a1ca4f364eae1c1723ef77b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 06:47:24 +0200 +Subject: net: dp83822: Fix OF_MDIO config check + +From: Johannes Schneider + +[ Upstream commit 607b310ada5ef4c738f9dffc758a62a9d309b084 ] + +When CONFIG_OF_MDIO is set to be a module the code block is not +compiled. Use the IS_ENABLED macro that checks for both built in as +well as module. + +Fixes: 5dc39fd5ef35 ("net: phy: DP83822: Add ability to advertise Fiber connection") +Signed-off-by: Johannes Schneider +Reviewed-by: Maxime Chevallier +Link: https://patch.msgid.link/20250423044724.1284492-1-johannes.schneider@leica-geosystems.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/phy/dp83822.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c +index 3662f3905d5ad..89094aa6dbbdb 100644 +--- a/drivers/net/phy/dp83822.c ++++ b/drivers/net/phy/dp83822.c +@@ -730,7 +730,7 @@ static int dp83822_phy_reset(struct phy_device *phydev) + return phydev->drv->config_init(phydev); + } + +-#ifdef CONFIG_OF_MDIO ++#if IS_ENABLED(CONFIG_OF_MDIO) + static const u32 tx_amplitude_100base_tx_gain[] = { + 80, 82, 83, 85, 87, 88, 90, 92, + 93, 95, 97, 98, 100, 102, 103, 105, +-- +2.39.5 + diff --git a/queue-6.14/net-dsa-mt7530-sync-driver-specific-behavior-of-mt75.patch b/queue-6.14/net-dsa-mt7530-sync-driver-specific-behavior-of-mt75.patch new file mode 100644 index 0000000000..66cc8f0231 --- /dev/null +++ b/queue-6.14/net-dsa-mt7530-sync-driver-specific-behavior-of-mt75.patch @@ -0,0 +1,57 @@ +From 1d7ada5ce471966bd32c227dc6a731c71e00d1a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Apr 2025 04:10:20 +0100 +Subject: net: dsa: mt7530: sync driver-specific behavior of MT7531 variants + +From: Daniel Golle + +[ Upstream commit 497041d763016c2e8314d2f6a329a9b77c3797ca ] + +MT7531 standalone and MMIO variants found in MT7988 and EN7581 share +most basic properties. Despite that, assisted_learning_on_cpu_port and +mtu_enforcement_ingress were only applied for MT7531 but not for MT7988 +or EN7581, causing the expected issues on MMIO devices. + +Apply both settings equally also for MT7988 and EN7581 by moving both +assignments form mt7531_setup() to mt7531_setup_common(). + +This fixes unwanted flooding of packets due to unknown unicast +during DA lookup, as well as issues with heterogenous MTU settings. + +Fixes: 7f54cc9772ce ("net: dsa: mt7530: split-off common parts from mt7531_setup") +Signed-off-by: Daniel Golle +Reviewed-by: Chester A. Unal +Link: https://patch.msgid.link/89ed7ec6d4fa0395ac53ad2809742bb1ce61ed12.1745290867.git.daniel@makrotopia.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index 5883eb93efb11..22513f3d56db1 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -2541,6 +2541,9 @@ mt7531_setup_common(struct dsa_switch *ds) + struct mt7530_priv *priv = ds->priv; + int ret, i; + ++ ds->assisted_learning_on_cpu_port = true; ++ ds->mtu_enforcement_ingress = true; ++ + mt753x_trap_frames(priv); + + /* Enable and reset MIB counters */ +@@ -2688,9 +2691,6 @@ mt7531_setup(struct dsa_switch *ds) + if (ret) + return ret; + +- ds->assisted_learning_on_cpu_port = true; +- ds->mtu_enforcement_ingress = true; +- + return 0; + } + +-- +2.39.5 + diff --git a/queue-6.14/net-enetc-fix-frame-corruption-on-bpf_xdp_adjust_hea.patch b/queue-6.14/net-enetc-fix-frame-corruption-on-bpf_xdp_adjust_hea.patch new file mode 100644 index 0000000000..83034e6e4d --- /dev/null +++ b/queue-6.14/net-enetc-fix-frame-corruption-on-bpf_xdp_adjust_hea.patch @@ -0,0 +1,152 @@ +From d852e27e977f491ed5d44ac00b2b012822732602 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 15:00:05 +0300 +Subject: net: enetc: fix frame corruption on bpf_xdp_adjust_head/tail() and + XDP_PASS + +From: Vladimir Oltean + +[ Upstream commit 020f0c8b3d396ec8190948f86063e1c45133f839 ] + +Vlatko Markovikj reported that XDP programs attached to ENETC do not +work well if they use bpf_xdp_adjust_head() or bpf_xdp_adjust_tail(), +combined with the XDP_PASS verdict. A typical use case is to add or +remove a VLAN tag. + +The resulting sk_buff passed to the stack is corrupted, because the +algorithm used by the driver for XDP_PASS is to unwind the current +buffer pointer in the RX ring and to re-process the current frame with +enetc_build_skb() as if XDP hadn't run. That is incorrect because XDP +may have modified the geometry of the buffer, which we then are +completely unaware of. We are looking at a modified buffer with the +original geometry. + +The initial reaction, both from me and from Vlatko, was to shop around +the kernel for code to steal that would calculate a delta between the +old and the new XDP buffer geometry, and apply that to the sk_buff too. +We noticed that veth and generic xdp have such code. + +The headroom adjustment is pretty uncontroversial, but what turned out +severely problematic is the tailroom. + +veth has this snippet: + + __skb_put(skb, off); /* positive on grow, negative on shrink */ + +which on first sight looks decent enough, except __skb_put() takes an +"unsigned int" for the second argument, and the arithmetic seems to only +work correctly by coincidence. Second issue, __skb_put() contains a +SKB_LINEAR_ASSERT(). It's not a great pattern to make more widespread. +The skb may still be nonlinear at that point - it only becomes linear +later when resetting skb->data_len to zero. + +To avoid the above, bpf_prog_run_generic_xdp() does this instead: + + skb_set_tail_pointer(skb, xdp->data_end - xdp->data); + skb->len += off; /* positive on grow, negative on shrink */ + +which is more open-coded, uses lower-level functions and is in general a +bit too much to spread around in driver code. + +Then there is the snippet: + + if (xdp_buff_has_frags(xdp)) + skb->data_len = skb_shinfo(skb)->xdp_frags_size; + else + skb->data_len = 0; + +One would have expected __pskb_trim() to be the function of choice for +this task. But it's not used in veth/xdpgeneric because the extraneous +fragments were _already_ freed by bpf_xdp_adjust_tail() -> +bpf_xdp_frags_shrink_tail() -> ... -> __xdp_return() - the backing +memory for the skb frags and the xdp frags is the same, but they don't +keep individual references. + +In fact, that is the biggest reason why this snippet cannot be reused +as-is, because ENETC temporarily constructs an skb with the original len +and the original number of frags. Because the extraneous frags are +already freed by bpf_xdp_adjust_tail() and returned to the page +allocator, it means the entire approach of using enetc_build_skb() is +questionable for XDP_PASS. To avoid that, one would need to elevate the +page refcount of all frags before calling bpf_prog_run_xdp() and drop it +after XDP_PASS. + +There are other things that are missing in ENETC's handling of XDP_PASS, +like for example updating skb_shinfo(skb)->meta_len. + +These are all handled correctly and cleanly in commit 539c1fba1ac7 +("xdp: add generic xdp_build_skb_from_buff()"), added to net-next in +Dec 2024, and in addition might even be quicker that way. I have a very +strong preference towards backporting that commit for "stable", and that +is what is used to fix the handling bugs. It is way too messy to go +this deep into the guts of an sk_buff from the code of a device driver. + +Fixes: d1b15102dd16 ("net: enetc: add support for XDP_DROP and XDP_PASS") +Reported-by: Vlatko Markovikj +Signed-off-by: Vladimir Oltean +Reviewed-by: Wei Fang +Link: https://patch.msgid.link/20250417120005.3288549-4-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/enetc/enetc.c | 26 +++++++++++--------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c +index 74721995cb1f9..3ee52f4b11660 100644 +--- a/drivers/net/ethernet/freescale/enetc/enetc.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc.c +@@ -1878,11 +1878,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + + while (likely(rx_frm_cnt < work_limit)) { + union enetc_rx_bd *rxbd, *orig_rxbd; +- int orig_i, orig_cleaned_cnt; + struct xdp_buff xdp_buff; + struct sk_buff *skb; ++ int orig_i, err; + u32 bd_status; +- int err; + + rxbd = enetc_rxbd(rx_ring, i); + bd_status = le32_to_cpu(rxbd->r.lstatus); +@@ -1897,7 +1896,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + break; + + orig_rxbd = rxbd; +- orig_cleaned_cnt = cleaned_cnt; + orig_i = i; + + enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, +@@ -1925,15 +1923,21 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + rx_ring->stats.xdp_drops++; + break; + case XDP_PASS: +- rxbd = orig_rxbd; +- cleaned_cnt = orig_cleaned_cnt; +- i = orig_i; +- +- skb = enetc_build_skb(rx_ring, bd_status, &rxbd, +- &i, &cleaned_cnt, +- ENETC_RXB_DMA_SIZE_XDP); +- if (unlikely(!skb)) ++ skb = xdp_build_skb_from_buff(&xdp_buff); ++ /* Probably under memory pressure, stop NAPI */ ++ if (unlikely(!skb)) { ++ enetc_xdp_drop(rx_ring, orig_i, i); ++ rx_ring->stats.xdp_drops++; + goto out; ++ } ++ ++ enetc_get_offloads(rx_ring, orig_rxbd, skb); ++ ++ /* These buffers are about to be owned by the stack. ++ * Update our buffer cache (the rx_swbd array elements) ++ * with their other page halves. ++ */ ++ enetc_bulk_flip_buff(rx_ring, orig_i, i); + + napi_gro_receive(napi, skb); + break; +-- +2.39.5 + diff --git a/queue-6.14/net-enetc-refactor-bulk-flipping-of-rx-buffers-to-se.patch b/queue-6.14/net-enetc-refactor-bulk-flipping-of-rx-buffers-to-se.patch new file mode 100644 index 0000000000..59247ea991 --- /dev/null +++ b/queue-6.14/net-enetc-refactor-bulk-flipping-of-rx-buffers-to-se.patch @@ -0,0 +1,67 @@ +From 4cb401e4bea56e9115b729dcb195ba6117deb103 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 15:00:04 +0300 +Subject: net: enetc: refactor bulk flipping of RX buffers to separate function + +From: Vladimir Oltean + +[ Upstream commit 1d587faa5be7e9785b682cc5f58ba8f4100c13ea ] + +This small snippet of code ensures that we do something with the array +of RX software buffer descriptor elements after passing the skb to the +stack. In this case, we see if the other half of the page is reusable, +and if so, we "turn around" the buffers, making them directly usable by +enetc_refill_rx_ring() without going to enetc_new_page(). + +We will need to perform this kind of buffer flipping from a new code +path, i.e. from XDP_PASS. Currently, enetc_build_skb() does it there +buffer by buffer, but in a subsequent change we will stop using +enetc_build_skb() for XDP_PASS. + +Signed-off-by: Vladimir Oltean +Reviewed-by: Wei Fang +Link: https://patch.msgid.link/20250417120005.3288549-3-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 020f0c8b3d39 ("net: enetc: fix frame corruption on bpf_xdp_adjust_head/tail() and XDP_PASS") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/enetc/enetc.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c +index 9b333254c73ec..74721995cb1f9 100644 +--- a/drivers/net/ethernet/freescale/enetc/enetc.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc.c +@@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, + } + } + ++static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first, ++ int rx_ring_last) ++{ ++ while (rx_ring_first != rx_ring_last) { ++ enetc_flip_rx_buff(rx_ring, ++ &rx_ring->rx_swbd[rx_ring_first]); ++ enetc_bdr_idx_inc(rx_ring, &rx_ring_first); ++ } ++} ++ + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit, + struct bpf_prog *prog) +@@ -1965,11 +1975,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_redirect_failures++; + } else { +- while (orig_i != i) { +- enetc_flip_rx_buff(rx_ring, +- &rx_ring->rx_swbd[orig_i]); +- enetc_bdr_idx_inc(rx_ring, &orig_i); +- } ++ enetc_bulk_flip_buff(rx_ring, orig_i, i); + xdp_redirect_frm_cnt++; + rx_ring->stats.xdp_redirect++; + } +-- +2.39.5 + diff --git a/queue-6.14/net-enetc-register-xdp-rx-queues-with-frag_size.patch b/queue-6.14/net-enetc-register-xdp-rx-queues-with-frag_size.patch new file mode 100644 index 0000000000..de13f4af83 --- /dev/null +++ b/queue-6.14/net-enetc-register-xdp-rx-queues-with-frag_size.patch @@ -0,0 +1,46 @@ +From cd67d49f4c04421eebcf156855b6fa70006e0b62 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 15:00:03 +0300 +Subject: net: enetc: register XDP RX queues with frag_size + +From: Vladimir Oltean + +[ Upstream commit 2768b2e2f7d25ae8984ebdcde8ec1014b6fdcd89 ] + +At the time when bpf_xdp_adjust_tail() gained support for non-linear +buffers, ENETC was already generating this kind of geometry on RX, due +to its use of 2K half page buffers. Frames larger than 1472 bytes +(without FCS) are stored as multi-buffer, presenting a need for multi +buffer support to work properly even in standard MTU circumstances. + +Allow bpf_xdp_frags_increase_tail() to know the allocation size of paged +data, so it can safely permit growing the tailroom of the buffer from +XDP programs. + +Fixes: bf25146a5595 ("bpf: add frags support to the bpf_xdp_adjust_tail() API") +Signed-off-by: Vladimir Oltean +Reviewed-by: Wei Fang +Link: https://patch.msgid.link/20250417120005.3288549-2-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/enetc/enetc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c +index 2106861463e40..9b333254c73ec 100644 +--- a/drivers/net/ethernet/freescale/enetc/enetc.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc.c +@@ -3362,7 +3362,8 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, + bdr->buffer_offset = ENETC_RXB_PAD; + priv->rx_ring[i] = bdr; + +- err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); ++ err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, ++ ENETC_RXB_DMA_SIZE_XDP); + if (err) + goto free_vector; + +-- +2.39.5 + diff --git a/queue-6.14/net-ethernet-mtk_eth_soc-net-revise-netsysv3-hardwar.patch b/queue-6.14/net-ethernet-mtk_eth_soc-net-revise-netsysv3-hardwar.patch new file mode 100644 index 0000000000..edd200e01f --- /dev/null +++ b/queue-6.14/net-ethernet-mtk_eth_soc-net-revise-netsysv3-hardwar.patch @@ -0,0 +1,97 @@ +From 9d1b1f49920b24beca21927e8e14b8a1ad97d77b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 17:41:07 +0100 +Subject: net: ethernet: mtk_eth_soc: net: revise NETSYSv3 hardware + configuration + +From: Bo-Cun Chen + +[ Upstream commit 491ef1117c56476f199b481f8c68820fe4c3a7c2 ] + +Change hardware configuration for the NETSYSv3. + - Enable PSE dummy page mechanism for the GDM1/2/3 + - Enable PSE drop mechanism when the WDMA Rx ring full + - Enable PSE no-drop mechanism for packets from the WDMA Tx + - Correct PSE free drop threshold + - Correct PSE CDMA high threshold + +Fixes: 1953f134a1a8b ("net: ethernet: mtk_eth_soc: add NETSYS_V3 version support") +Signed-off-by: Bo-Cun Chen +Signed-off-by: Daniel Golle +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/b71f8fd9d4bb69c646c4d558f9331dd965068606.1744907886.git.daniel@makrotopia.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 24 +++++++++++++++++---- + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 10 ++++++++- + 2 files changed, 29 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 0cd1ecacfd29f..477b8732b8609 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -3997,11 +3997,27 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) + mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); + + if (mtk_is_netsys_v3_or_greater(eth)) { +- /* PSE should not drop port1, port8 and port9 packets */ +- mtk_w32(eth, 0x00000302, PSE_DROP_CFG); ++ /* PSE dummy page mechanism */ ++ mtk_w32(eth, PSE_DUMMY_WORK_GDM(1) | PSE_DUMMY_WORK_GDM(2) | ++ PSE_DUMMY_WORK_GDM(3) | DUMMY_PAGE_THR, PSE_DUMY_REQ); ++ ++ /* PSE free buffer drop threshold */ ++ mtk_w32(eth, 0x00600009, PSE_IQ_REV(8)); ++ ++ /* PSE should not drop port8, port9 and port13 packets from ++ * WDMA Tx ++ */ ++ mtk_w32(eth, 0x00002300, PSE_DROP_CFG); ++ ++ /* PSE should drop packets to port8, port9 and port13 on WDMA Rx ++ * ring full ++ */ ++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(0)); ++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(1)); ++ mtk_w32(eth, 0x00002300, PSE_PPE_DROP(2)); + + /* GDM and CDM Threshold */ +- mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES); ++ mtk_w32(eth, 0x08000707, MTK_CDMW0_THRES); + mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES); + + /* Disable GDM1 RX CRC stripping */ +@@ -4018,7 +4034,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) + mtk_w32(eth, 0x00000300, PSE_DROP_CFG); + + /* PSE should drop packets to port 8/9 on WDMA Rx ring full */ +- mtk_w32(eth, 0x00000300, PSE_PPE0_DROP); ++ mtk_w32(eth, 0x00000300, PSE_PPE_DROP(0)); + + /* PSE Free Queue Flow Control */ + mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2); +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +index 8d7b6818d8601..0570623e569d5 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -151,7 +151,15 @@ + #define PSE_FQFC_CFG1 0x100 + #define PSE_FQFC_CFG2 0x104 + #define PSE_DROP_CFG 0x108 +-#define PSE_PPE0_DROP 0x110 ++#define PSE_PPE_DROP(x) (0x110 + ((x) * 0x4)) ++ ++/* PSE Last FreeQ Page Request Control */ ++#define PSE_DUMY_REQ 0x10C ++/* PSE_DUMY_REQ is not a typo but actually called like that also in ++ * MediaTek's datasheet ++ */ ++#define PSE_DUMMY_WORK_GDM(x) BIT(16 + (x)) ++#define DUMMY_PAGE_THR 0x1 + + /* PSE Input Queue Reservation Register*/ + #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2)) +-- +2.39.5 + diff --git a/queue-6.14/net-lwtunnel-disable-bhs-when-required.patch b/queue-6.14/net-lwtunnel-disable-bhs-when-required.patch new file mode 100644 index 0000000000..981706d0c7 --- /dev/null +++ b/queue-6.14/net-lwtunnel-disable-bhs-when-required.patch @@ -0,0 +1,120 @@ +From 253b7459844f66fe63791b44af7764e1c9d06f6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Apr 2025 18:07:16 +0200 +Subject: net: lwtunnel: disable BHs when required + +From: Justin Iurman + +[ Upstream commit c03a49f3093a4903c8a93c8b5c9a297b5343b169 ] + +In lwtunnel_{output|xmit}(), dev_xmit_recursion() may be called in +preemptible scope for PREEMPT kernels. This patch disables BHs before +calling dev_xmit_recursion(). BHs are re-enabled only at the end, since +we must ensure the same CPU is used for both dev_xmit_recursion_inc() +and dev_xmit_recursion_dec() (and any other recursion levels in some +cases) in order to maintain valid per-cpu counters. + +Reported-by: Alexei Starovoitov +Closes: https://lore.kernel.org/netdev/CAADnVQJFWn3dBFJtY+ci6oN1pDFL=TzCmNbRgey7MdYxt_AP2g@mail.gmail.com/ +Reported-by: Eduard Zingerman +Closes: https://lore.kernel.org/netdev/m2h62qwf34.fsf@gmail.com/ +Fixes: 986ffb3a57c5 ("net: lwtunnel: fix recursion loops") +Signed-off-by: Justin Iurman +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250416160716.8823-1-justin.iurman@uliege.be +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/lwtunnel.c | 26 ++++++++++++++++++++------ + 1 file changed, 20 insertions(+), 6 deletions(-) + +diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c +index 4417a18b3e951..f63586c9ce021 100644 +--- a/net/core/lwtunnel.c ++++ b/net/core/lwtunnel.c +@@ -332,6 +332,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) + struct dst_entry *dst; + int ret; + ++ local_bh_disable(); ++ + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); +@@ -347,8 +349,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) + lwtstate = dst->lwtstate; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || +- lwtstate->type > LWTUNNEL_ENCAP_MAX) +- return 0; ++ lwtstate->type > LWTUNNEL_ENCAP_MAX) { ++ ret = 0; ++ goto out; ++ } + + ret = -EOPNOTSUPP; + rcu_read_lock(); +@@ -363,11 +367,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) + if (ret == -EOPNOTSUPP) + goto drop; + +- return ret; ++ goto out; + + drop: + kfree_skb(skb); + ++out: ++ local_bh_enable(); + return ret; + } + EXPORT_SYMBOL_GPL(lwtunnel_output); +@@ -379,6 +385,8 @@ int lwtunnel_xmit(struct sk_buff *skb) + struct dst_entry *dst; + int ret; + ++ local_bh_disable(); ++ + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); +@@ -395,8 +403,10 @@ int lwtunnel_xmit(struct sk_buff *skb) + lwtstate = dst->lwtstate; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || +- lwtstate->type > LWTUNNEL_ENCAP_MAX) +- return 0; ++ lwtstate->type > LWTUNNEL_ENCAP_MAX) { ++ ret = 0; ++ goto out; ++ } + + ret = -EOPNOTSUPP; + rcu_read_lock(); +@@ -411,11 +421,13 @@ int lwtunnel_xmit(struct sk_buff *skb) + if (ret == -EOPNOTSUPP) + goto drop; + +- return ret; ++ goto out; + + drop: + kfree_skb(skb); + ++out: ++ local_bh_enable(); + return ret; + } + EXPORT_SYMBOL_GPL(lwtunnel_xmit); +@@ -427,6 +439,8 @@ int lwtunnel_input(struct sk_buff *skb) + struct dst_entry *dst; + int ret; + ++ DEBUG_NET_WARN_ON_ONCE(!in_softirq()); ++ + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); +-- +2.39.5 + diff --git a/queue-6.14/net-mlx5-fix-null-ptr-deref-in-mlx5_create_-inner_-t.patch b/queue-6.14/net-mlx5-fix-null-ptr-deref-in-mlx5_create_-inner_-t.patch new file mode 100644 index 0000000000..f177ef172f --- /dev/null +++ b/queue-6.14/net-mlx5-fix-null-ptr-deref-in-mlx5_create_-inner_-t.patch @@ -0,0 +1,55 @@ +From a6d591ccb91e2f8826e30037710ae902df1ba116 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Apr 2025 10:38:13 +0800 +Subject: net/mlx5: Fix null-ptr-deref in mlx5_create_{inner_,}ttc_table() + +From: Henry Martin + +[ Upstream commit 91037037ee3d611ce17f39d75f79c7de394b122a ] + +Add NULL check for mlx5_get_flow_namespace() returns in +mlx5_create_inner_ttc_table() and mlx5_create_ttc_table() to prevent +NULL pointer dereference. + +Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table") +Signed-off-by: Henry Martin +Reviewed-by: Mark Bloch +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/20250418023814.71789-2-bsdhenrymartin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +index 9f13cea164465..510879e1ba30e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +@@ -636,6 +636,11 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + } + + ns = mlx5_get_flow_namespace(dev, params->ns_type); ++ if (!ns) { ++ kvfree(ttc); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : + &inner_ttc_groups[TTC_GROUPS_DEFAULT]; + +@@ -709,6 +714,11 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + } + + ns = mlx5_get_flow_namespace(dev, params->ns_type); ++ if (!ns) { ++ kvfree(ttc); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : + &ttc_groups[TTC_GROUPS_DEFAULT]; + +-- +2.39.5 + diff --git a/queue-6.14/net-mlx5-move-ttc-allocation-after-switch-case-to-pr.patch b/queue-6.14/net-mlx5-move-ttc-allocation-after-switch-case-to-pr.patch new file mode 100644 index 0000000000..29c2470204 --- /dev/null +++ b/queue-6.14/net-mlx5-move-ttc-allocation-after-switch-case-to-pr.patch @@ -0,0 +1,77 @@ +From 5ab456f8dc68de9dfebb90b6fd338f174547ce5d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Apr 2025 10:38:14 +0800 +Subject: net/mlx5: Move ttc allocation after switch case to prevent leaks + +From: Henry Martin + +[ Upstream commit fa8fd315127ca48c65e7e6692a84ffcf3d07168e ] + +Relocate the memory allocation for ttc table after the switch statement +that validates params->ns_type in both mlx5_create_inner_ttc_table() and +mlx5_create_ttc_table(). This ensures memory is only allocated after +confirming valid input, eliminating potential memory leaks when invalid +ns_type cases occur. + +Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table") +Signed-off-by: Henry Martin +Reviewed-by: Michal Swiatkowski +Reviewed-by: Mark Bloch +Link: https://patch.msgid.link/20250418023814.71789-3-bsdhenrymartin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +index 510879e1ba30e..43b2216bc0a22 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +@@ -618,10 +618,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + bool use_l4_type; + int err; + +- ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); +- if (!ttc) +- return ERR_PTR(-ENOMEM); +- + switch (params->ns_type) { + case MLX5_FLOW_NAMESPACE_PORT_SEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && +@@ -635,6 +631,10 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + return ERR_PTR(-EINVAL); + } + ++ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); ++ if (!ttc) ++ return ERR_PTR(-ENOMEM); ++ + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); +@@ -696,10 +696,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + bool use_l4_type; + int err; + +- ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); +- if (!ttc) +- return ERR_PTR(-ENOMEM); +- + switch (params->ns_type) { + case MLX5_FLOW_NAMESPACE_PORT_SEL: + use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && +@@ -713,6 +709,10 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + return ERR_PTR(-EINVAL); + } + ++ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); ++ if (!ttc) ++ return ERR_PTR(-ENOMEM); ++ + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); +-- +2.39.5 + diff --git a/queue-6.14/net-phy-add-helper-for-getting-tx-amplitude-gain.patch b/queue-6.14/net-phy-add-helper-for-getting-tx-amplitude-gain.patch new file mode 100644 index 0000000000..c93d8e8c5e --- /dev/null +++ b/queue-6.14/net-phy-add-helper-for-getting-tx-amplitude-gain.patch @@ -0,0 +1,139 @@ +From 6b4d070dfde490b314ddc432e693be55c749d9ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 15:14:10 +0100 +Subject: net: phy: Add helper for getting tx amplitude gain + +From: Dimitri Fedrau + +[ Upstream commit 961ee5aeea048aa292f28d61f3a96a48554e91af ] + +Add helper which returns the tx amplitude gain defined in device tree. +Modifying it can be necessary to compensate losses on the PCB and +connector, so the voltages measured on the RJ45 pins are conforming. + +Signed-off-by: Dimitri Fedrau +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20250214-dp83822-tx-swing-v5-2-02ca72620599@liebherr.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 607b310ada5e ("net: dp83822: Fix OF_MDIO config check") +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phy_device.c | 53 ++++++++++++++++++++++++------------ + include/linux/phy.h | 4 +++ + 2 files changed, 39 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 92161af788afd..2a01887c5617e 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -3123,19 +3123,12 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause) + EXPORT_SYMBOL(phy_get_pause); + + #if IS_ENABLED(CONFIG_OF_MDIO) +-static int phy_get_int_delay_property(struct device *dev, const char *name) ++static int phy_get_u32_property(struct device *dev, const char *name, u32 *val) + { +- s32 int_delay; +- int ret; +- +- ret = device_property_read_u32(dev, name, &int_delay); +- if (ret) +- return ret; +- +- return int_delay; ++ return device_property_read_u32(dev, name, val); + } + #else +-static int phy_get_int_delay_property(struct device *dev, const char *name) ++static int phy_get_u32_property(struct device *dev, const char *name, u32 *val) + { + return -EINVAL; + } +@@ -3160,12 +3153,12 @@ static int phy_get_int_delay_property(struct device *dev, const char *name) + s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + const int *delay_values, int size, bool is_rx) + { +- s32 delay; +- int i; ++ int i, ret; ++ u32 delay; + + if (is_rx) { +- delay = phy_get_int_delay_property(dev, "rx-internal-delay-ps"); +- if (delay < 0 && size == 0) { ++ ret = phy_get_u32_property(dev, "rx-internal-delay-ps", &delay); ++ if (ret < 0 && size == 0) { + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + return 1; +@@ -3174,8 +3167,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + } + + } else { +- delay = phy_get_int_delay_property(dev, "tx-internal-delay-ps"); +- if (delay < 0 && size == 0) { ++ ret = phy_get_u32_property(dev, "tx-internal-delay-ps", &delay); ++ if (ret < 0 && size == 0) { + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + return 1; +@@ -3184,8 +3177,8 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + } + } + +- if (delay < 0) +- return delay; ++ if (ret < 0) ++ return ret; + + if (size == 0) + return delay; +@@ -3220,6 +3213,30 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + } + EXPORT_SYMBOL(phy_get_internal_delay); + ++/** ++ * phy_get_tx_amplitude_gain - stores tx amplitude gain in @val ++ * @phydev: phy_device struct ++ * @dev: pointer to the devices device struct ++ * @linkmode: linkmode for which the tx amplitude gain should be retrieved ++ * @val: tx amplitude gain ++ * ++ * Returns: 0 on success, < 0 on failure ++ */ ++int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, ++ enum ethtool_link_mode_bit_indices linkmode, ++ u32 *val) ++{ ++ switch (linkmode) { ++ case ETHTOOL_LINK_MODE_100baseT_Full_BIT: ++ return phy_get_u32_property(dev, ++ "tx-amplitude-100base-tx-percent", ++ val); ++ default: ++ return -EINVAL; ++ } ++} ++EXPORT_SYMBOL_GPL(phy_get_tx_amplitude_gain); ++ + static int phy_led_set_brightness(struct led_classdev *led_cdev, + enum led_brightness value) + { +diff --git a/include/linux/phy.h b/include/linux/phy.h +index 19f076a71f946..7c9da26145d30 100644 +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -2114,6 +2114,10 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); + s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, + const int *delay_values, int size, bool is_rx); + ++int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, ++ enum ethtool_link_mode_bit_indices linkmode, ++ u32 *val); ++ + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, + bool *tx_pause, bool *rx_pause); + +-- +2.39.5 + diff --git a/queue-6.14/net-phy-dp83822-add-support-for-changing-the-transmi.patch b/queue-6.14/net-phy-dp83822-add-support-for-changing-the-transmi.patch new file mode 100644 index 0000000000..1235bab4f3 --- /dev/null +++ b/queue-6.14/net-phy-dp83822-add-support-for-changing-the-transmi.patch @@ -0,0 +1,118 @@ +From b3295298245f8c2cae8ae72dd445d21eeb63fdca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Feb 2025 15:14:11 +0100 +Subject: net: phy: dp83822: Add support for changing the transmit amplitude + voltage + +From: Dimitri Fedrau + +[ Upstream commit 4f3735e82d8a2e80ee39731832536b1e34697c71 ] + +Add support for changing the transmit amplitude voltage in 100BASE-TX mode. +Modifying it can be necessary to compensate losses on the PCB and +connector, so the voltages measured on the RJ45 pins are conforming. + +Signed-off-by: Dimitri Fedrau +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20250214-dp83822-tx-swing-v5-3-02ca72620599@liebherr.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 607b310ada5e ("net: dp83822: Fix OF_MDIO config check") +Signed-off-by: Sasha Levin +--- + drivers/net/phy/dp83822.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c +index 6599feca1967d..3662f3905d5ad 100644 +--- a/drivers/net/phy/dp83822.c ++++ b/drivers/net/phy/dp83822.c +@@ -31,6 +31,7 @@ + #define MII_DP83822_RCSR 0x17 + #define MII_DP83822_RESET_CTRL 0x1f + #define MII_DP83822_MLEDCR 0x25 ++#define MII_DP83822_LDCTRL 0x403 + #define MII_DP83822_LEDCFG1 0x460 + #define MII_DP83822_IOCTRL1 0x462 + #define MII_DP83822_IOCTRL2 0x463 +@@ -123,6 +124,9 @@ + #define DP83822_IOCTRL1_GPIO1_CTRL GENMASK(2, 0) + #define DP83822_IOCTRL1_GPIO1_CTRL_LED_1 BIT(0) + ++/* LDCTRL bits */ ++#define DP83822_100BASE_TX_LINE_DRIVER_SWING GENMASK(7, 4) ++ + /* IOCTRL2 bits */ + #define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4) + #define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0) +@@ -197,6 +201,7 @@ struct dp83822_private { + bool set_gpio2_clk_out; + u32 gpio2_clk_out; + bool led_pin_enable[DP83822_MAX_LED_PINS]; ++ int tx_amplitude_100base_tx_index; + }; + + static int dp83822_config_wol(struct phy_device *phydev, +@@ -522,6 +527,12 @@ static int dp83822_config_init(struct phy_device *phydev) + FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC, + dp83822->gpio2_clk_out)); + ++ if (dp83822->tx_amplitude_100base_tx_index >= 0) ++ phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_LDCTRL, ++ DP83822_100BASE_TX_LINE_DRIVER_SWING, ++ FIELD_PREP(DP83822_100BASE_TX_LINE_DRIVER_SWING, ++ dp83822->tx_amplitude_100base_tx_index)); ++ + err = dp83822_config_init_leds(phydev); + if (err) + return err; +@@ -720,6 +731,11 @@ static int dp83822_phy_reset(struct phy_device *phydev) + } + + #ifdef CONFIG_OF_MDIO ++static const u32 tx_amplitude_100base_tx_gain[] = { ++ 80, 82, 83, 85, 87, 88, 90, 92, ++ 93, 95, 97, 98, 100, 102, 103, 105, ++}; ++ + static int dp83822_of_init_leds(struct phy_device *phydev) + { + struct device_node *node = phydev->mdio.dev.of_node; +@@ -780,6 +796,8 @@ static int dp83822_of_init(struct phy_device *phydev) + struct dp83822_private *dp83822 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + const char *of_val; ++ int i, ret; ++ u32 val; + + /* Signal detection for the PHY is only enabled if the FX_EN and the + * SD_EN pins are strapped. Signal detection can only enabled if FX_EN +@@ -815,6 +833,26 @@ static int dp83822_of_init(struct phy_device *phydev) + dp83822->set_gpio2_clk_out = true; + } + ++ dp83822->tx_amplitude_100base_tx_index = -1; ++ ret = phy_get_tx_amplitude_gain(phydev, dev, ++ ETHTOOL_LINK_MODE_100baseT_Full_BIT, ++ &val); ++ if (!ret) { ++ for (i = 0; i < ARRAY_SIZE(tx_amplitude_100base_tx_gain); i++) { ++ if (tx_amplitude_100base_tx_gain[i] == val) { ++ dp83822->tx_amplitude_100base_tx_index = i; ++ break; ++ } ++ } ++ ++ if (dp83822->tx_amplitude_100base_tx_index < 0) { ++ phydev_err(phydev, ++ "Invalid value for tx-amplitude-100base-tx-percent property (%u)\n", ++ val); ++ return -EINVAL; ++ } ++ } ++ + return dp83822_of_init_leds(phydev); + } + +-- +2.39.5 + diff --git a/queue-6.14/net-phy-leds-fix-memory-leak.patch b/queue-6.14/net-phy-leds-fix-memory-leak.patch new file mode 100644 index 0000000000..3e4b2f63fa --- /dev/null +++ b/queue-6.14/net-phy-leds-fix-memory-leak.patch @@ -0,0 +1,101 @@ +From 94665d32a7069e1111905b58dd2ab0a671a5d54b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 11:25:56 +0800 +Subject: net: phy: leds: fix memory leak + +From: Qingfang Deng + +[ Upstream commit b7f0ee992adf601aa00c252418266177eb7ac2bc ] + +A network restart test on a router led to an out-of-memory condition, +which was traced to a memory leak in the PHY LED trigger code. + +The root cause is misuse of the devm API. The registration function +(phy_led_triggers_register) is called from phy_attach_direct, not +phy_probe, and the unregister function (phy_led_triggers_unregister) +is called from phy_detach, not phy_remove. This means the register and +unregister functions can be called multiple times for the same PHY +device, but devm-allocated memory is not freed until the driver is +unbound. + +This also prevents kmemleak from detecting the leak, as the devm API +internally stores the allocated pointer. + +Fix this by replacing devm_kzalloc/devm_kcalloc with standard +kzalloc/kcalloc, and add the corresponding kfree calls in the unregister +path. + +Fixes: 3928ee6485a3 ("net: phy: leds: Add support for "link" trigger") +Fixes: 2e0bc452f472 ("net: phy: leds: add support for led triggers on phy link state change") +Signed-off-by: Hao Guan +Signed-off-by: Qingfang Deng +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20250417032557.2929427-1-dqfext@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phy_led_triggers.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c +index f550576eb9dae..6f9d8da76c4df 100644 +--- a/drivers/net/phy/phy_led_triggers.c ++++ b/drivers/net/phy/phy_led_triggers.c +@@ -91,9 +91,8 @@ int phy_led_triggers_register(struct phy_device *phy) + if (!phy->phy_num_led_triggers) + return 0; + +- phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, +- sizeof(*phy->led_link_trigger), +- GFP_KERNEL); ++ phy->led_link_trigger = kzalloc(sizeof(*phy->led_link_trigger), ++ GFP_KERNEL); + if (!phy->led_link_trigger) { + err = -ENOMEM; + goto out_clear; +@@ -103,10 +102,9 @@ int phy_led_triggers_register(struct phy_device *phy) + if (err) + goto out_free_link; + +- phy->phy_led_triggers = devm_kcalloc(&phy->mdio.dev, +- phy->phy_num_led_triggers, +- sizeof(struct phy_led_trigger), +- GFP_KERNEL); ++ phy->phy_led_triggers = kcalloc(phy->phy_num_led_triggers, ++ sizeof(struct phy_led_trigger), ++ GFP_KERNEL); + if (!phy->phy_led_triggers) { + err = -ENOMEM; + goto out_unreg_link; +@@ -127,11 +125,11 @@ int phy_led_triggers_register(struct phy_device *phy) + out_unreg: + while (i--) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); +- devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); ++ kfree(phy->phy_led_triggers); + out_unreg_link: + phy_led_trigger_unregister(phy->led_link_trigger); + out_free_link: +- devm_kfree(&phy->mdio.dev, phy->led_link_trigger); ++ kfree(phy->led_link_trigger); + phy->led_link_trigger = NULL; + out_clear: + phy->phy_num_led_triggers = 0; +@@ -145,8 +143,13 @@ void phy_led_triggers_unregister(struct phy_device *phy) + + for (i = 0; i < phy->phy_num_led_triggers; i++) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); ++ kfree(phy->phy_led_triggers); ++ phy->phy_led_triggers = NULL; + +- if (phy->led_link_trigger) ++ if (phy->led_link_trigger) { + phy_led_trigger_unregister(phy->led_link_trigger); ++ kfree(phy->led_link_trigger); ++ phy->led_link_trigger = NULL; ++ } + } + EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); +-- +2.39.5 + diff --git a/queue-6.14/net-phylink-fix-suspend-resume-with-wol-enabled-and-.patch b/queue-6.14/net-phylink-fix-suspend-resume-with-wol-enabled-and-.patch new file mode 100644 index 0000000000..60e2b9e047 --- /dev/null +++ b/queue-6.14/net-phylink-fix-suspend-resume-with-wol-enabled-and-.patch @@ -0,0 +1,95 @@ +From 3537813107fb2288ffff0c3d424080425a5066f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Apr 2025 17:16:01 +0100 +Subject: net: phylink: fix suspend/resume with WoL enabled and link down + +From: Russell King (Oracle) + +[ Upstream commit 4c8925cb9db158c812e1e11f3e74b945df7c9801 ] + +When WoL is enabled, we update the software state in phylink to +indicate that the link is down, and disable the resolver from +bringing the link back up. + +On resume, we attempt to bring the overall state into consistency +by calling the .mac_link_down() method, but this is wrong if the +link was already down, as phylink strictly orders the .mac_link_up() +and .mac_link_down() methods - and this would break that ordering. + +Fixes: f97493657c63 ("net: phylink: add suspend/resume support") +Signed-off-by: Russell King (Oracle) +Tested-by: Russell King (Oracle) +Link: https://patch.msgid.link/E1u55Qf-0016RN-PA@rmk-PC.armlinux.org.uk +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phylink.c | 38 ++++++++++++++++++++++---------------- + 1 file changed, 22 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c +index b74b1c3365000..306275fbe4c98 100644 +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -82,6 +82,7 @@ struct phylink { + unsigned int pcs_state; + + bool link_failed; ++ bool suspend_link_up; + bool major_config_failed; + bool mac_supports_eee_ops; + bool mac_supports_eee; +@@ -2645,14 +2646,16 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) + /* Stop the resolver bringing the link up */ + __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); + +- /* Disable the carrier, to prevent transmit timeouts, +- * but one would hope all packets have been sent. This +- * also means phylink_resolve() will do nothing. +- */ +- if (pl->netdev) +- netif_carrier_off(pl->netdev); +- else ++ pl->suspend_link_up = phylink_link_is_up(pl); ++ if (pl->suspend_link_up) { ++ /* Disable the carrier, to prevent transmit timeouts, ++ * but one would hope all packets have been sent. This ++ * also means phylink_resolve() will do nothing. ++ */ ++ if (pl->netdev) ++ netif_carrier_off(pl->netdev); + pl->old_link_state = false; ++ } + + /* We do not call mac_link_down() here as we want the + * link to remain up to receive the WoL packets. +@@ -2678,15 +2681,18 @@ void phylink_resume(struct phylink *pl) + if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { + /* Wake-on-Lan enabled, MAC handling */ + +- /* Call mac_link_down() so we keep the overall state balanced. +- * Do this under the state_mutex lock for consistency. This +- * will cause a "Link Down" message to be printed during +- * resume, which is harmless - the true link state will be +- * printed when we run a resolve. +- */ +- mutex_lock(&pl->state_mutex); +- phylink_link_down(pl); +- mutex_unlock(&pl->state_mutex); ++ if (pl->suspend_link_up) { ++ /* Call mac_link_down() so we keep the overall state ++ * balanced. Do this under the state_mutex lock for ++ * consistency. This will cause a "Link Down" message ++ * to be printed during resume, which is harmless - ++ * the true link state will be printed when we run a ++ * resolve. ++ */ ++ mutex_lock(&pl->state_mutex); ++ phylink_link_down(pl); ++ mutex_unlock(&pl->state_mutex); ++ } + + /* Re-apply the link parameters so that all the settings get + * restored to the MAC. +-- +2.39.5 + diff --git a/queue-6.14/net-phylink-force-link-down-on-major_config-failure.patch b/queue-6.14/net-phylink-force-link-down-on-major_config-failure.patch new file mode 100644 index 0000000000..8c156a9f43 --- /dev/null +++ b/queue-6.14/net-phylink-force-link-down-on-major_config-failure.patch @@ -0,0 +1,136 @@ +From 14dc0f586efca1f4a718bac4f126d308c4ccc971 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Mar 2025 16:40:08 +0000 +Subject: net: phylink: force link down on major_config failure + +From: Russell King (Oracle) + +[ Upstream commit f1ae32a709e0b525d7963207eb3a4747626f4818 ] + +If we fail to configure the MAC or PCS according to the desired mode, +do not allow the network link to come up until we have successfully +configured the MAC and PCS. This improves phylink's behaviour when an +error occurs. + +Signed-off-by: Russell King (Oracle) +Link: https://patch.msgid.link/E1twkqO-0006FI-Gm@rmk-PC.armlinux.org.uk +Signed-off-by: Jakub Kicinski +Stable-dep-of: 4c8925cb9db1 ("net: phylink: fix suspend/resume with WoL enabled and link down") +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phylink.c | 42 +++++++++++++++++++++++++++++++-------- + 1 file changed, 34 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c +index b00a315de0601..b74b1c3365000 100644 +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -82,6 +82,7 @@ struct phylink { + unsigned int pcs_state; + + bool link_failed; ++ bool major_config_failed; + bool mac_supports_eee_ops; + bool mac_supports_eee; + bool phy_enable_tx_lpi; +@@ -1360,12 +1361,16 @@ static void phylink_major_config(struct phylink *pl, bool restart, + phylink_an_mode_str(pl->req_link_an_mode), + phy_modes(state->interface)); + ++ pl->major_config_failed = false; ++ + if (pl->mac_ops->mac_select_pcs) { + pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface); + if (IS_ERR(pcs)) { + phylink_err(pl, + "mac_select_pcs unexpectedly failed: %pe\n", + pcs); ++ ++ pl->major_config_failed = true; + return; + } + +@@ -1387,6 +1392,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, + if (err < 0) { + phylink_err(pl, "mac_prepare failed: %pe\n", + ERR_PTR(err)); ++ pl->major_config_failed = true; + return; + } + } +@@ -1410,8 +1416,15 @@ static void phylink_major_config(struct phylink *pl, bool restart, + + phylink_mac_config(pl, state); + +- if (pl->pcs) +- phylink_pcs_post_config(pl->pcs, state->interface); ++ if (pl->pcs) { ++ err = phylink_pcs_post_config(pl->pcs, state->interface); ++ if (err < 0) { ++ phylink_err(pl, "pcs_post_config failed: %pe\n", ++ ERR_PTR(err)); ++ ++ pl->major_config_failed = true; ++ } ++ } + + if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed) + phylink_pcs_enable(pl->pcs); +@@ -1422,11 +1435,12 @@ static void phylink_major_config(struct phylink *pl, bool restart, + + err = phylink_pcs_config(pl->pcs, neg_mode, state, + !!(pl->link_config.pause & MLO_PAUSE_AN)); +- if (err < 0) +- phylink_err(pl, "pcs_config failed: %pe\n", +- ERR_PTR(err)); +- else if (err > 0) ++ if (err < 0) { ++ phylink_err(pl, "pcs_config failed: %pe\n", ERR_PTR(err)); ++ pl->major_config_failed = true; ++ } else if (err > 0) { + restart = true; ++ } + + if (restart) + phylink_pcs_an_restart(pl); +@@ -1434,16 +1448,22 @@ static void phylink_major_config(struct phylink *pl, bool restart, + if (pl->mac_ops->mac_finish) { + err = pl->mac_ops->mac_finish(pl->config, pl->act_link_an_mode, + state->interface); +- if (err < 0) ++ if (err < 0) { + phylink_err(pl, "mac_finish failed: %pe\n", + ERR_PTR(err)); ++ ++ pl->major_config_failed = true; ++ } + } + + if (pl->phydev && pl->phy_ib_mode) { + err = phy_config_inband(pl->phydev, pl->phy_ib_mode); +- if (err < 0) ++ if (err < 0) { + phylink_err(pl, "phy_config_inband: %pe\n", + ERR_PTR(err)); ++ ++ pl->major_config_failed = true; ++ } + } + + if (pl->sfp_bus) { +@@ -1795,6 +1815,12 @@ static void phylink_resolve(struct work_struct *w) + } + } + ++ /* If configuration of the interface failed, force the link down ++ * until we get a successful configuration. ++ */ ++ if (pl->major_config_failed) ++ link_state.link = false; ++ + if (link_state.link != cur_link_state) { + pl->old_link_state = link_state.link; + if (!link_state.link) +-- +2.39.5 + diff --git a/queue-6.14/net-stmmac-fix-dwmac1000-ptp-timestamp-status-offset.patch b/queue-6.14/net-stmmac-fix-dwmac1000-ptp-timestamp-status-offset.patch new file mode 100644 index 0000000000..3ce00cc870 --- /dev/null +++ b/queue-6.14/net-stmmac-fix-dwmac1000-ptp-timestamp-status-offset.patch @@ -0,0 +1,51 @@ +From 5329aaa3189956b271142095681cd8661cb03de8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 09:12:09 +0200 +Subject: net: stmmac: fix dwmac1000 ptp timestamp status offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alexis Lothore + +[ Upstream commit 73fa4597bdc035437fbcd84d6be32bd39f1f2149 ] + +When a PTP interrupt occurs, the driver accesses the wrong offset to +learn about the number of available snapshots in the FIFO for dwmac1000: +it should be accessing bits 29..25, while it is currently reading bits +19..16 (those are bits about the auxiliary triggers which have generated +the timestamps). As a consequence, it does not compute correctly the +number of available snapshots, and so possibly do not generate the +corresponding clock events if the bogus value ends up being 0. + +Fix clock events generation by reading the correct bits in the timestamp +register for dwmac1000. + +Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations") +Signed-off-by: Alexis Lothoré +Reviewed-by: Maxime Chevallier +Link: https://patch.msgid.link/20250423-stmmac_ts-v2-1-e2cf2bbd61b1@bootlin.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +index 600fea8f712fd..2d5bf1de5d2e4 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +@@ -331,8 +331,8 @@ enum rtc_control { + + /* PTP and timestamping registers */ + +-#define GMAC3_X_ATSNS GENMASK(19, 16) +-#define GMAC3_X_ATSNS_SHIFT 16 ++#define GMAC3_X_ATSNS GENMASK(29, 25) ++#define GMAC3_X_ATSNS_SHIFT 25 + + #define GMAC_PTP_TCR_ATSFC BIT(24) + #define GMAC_PTP_TCR_ATSEN0 BIT(25) +-- +2.39.5 + diff --git a/queue-6.14/net-stmmac-fix-multiplication-overflow-when-reading-.patch b/queue-6.14/net-stmmac-fix-multiplication-overflow-when-reading-.patch new file mode 100644 index 0000000000..577e2c0d70 --- /dev/null +++ b/queue-6.14/net-stmmac-fix-multiplication-overflow-when-reading-.patch @@ -0,0 +1,65 @@ +From 9af54610d5957845e8e5fbf542a7ce000899f7bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 09:12:10 +0200 +Subject: net: stmmac: fix multiplication overflow when reading timestamp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alexis Lothoré + +[ Upstream commit 7b7491372f8ec2d8c08da18e5d629e55f41dda89 ] + +The current way of reading a timestamp snapshot in stmmac can lead to +integer overflow, as the computation is done on 32 bits. The issue has +been observed on a dwmac-socfpga platform returning chaotic timestamp +values due to this overflow. The corresponding multiplication is done +with a MUL instruction, which returns 32 bit values. Explicitly casting +the value to 64 bits replaced the MUL with a UMLAL, which computes and +returns the result on 64 bits, and so returns correctly the timestamps. + +Prevent this overflow by explicitly casting the intermediate value to +u64 to make sure that the whole computation is made on u64. While at it, +apply the same cast on the other dwmac variant (GMAC4) method for +snapshot retrieval. + +Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations") +Signed-off-by: Alexis Lothoré +Reviewed-by: Maxime Chevallier +Link: https://patch.msgid.link/20250423-stmmac_ts-v2-2-e2cf2bbd61b1@bootlin.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- + drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +index 96bcda0856ec6..11c525b8d2698 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +@@ -560,7 +560,7 @@ void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) + u64 ns; + + ns = readl(ptpaddr + GMAC_PTP_ATNR); +- ns += readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; ++ ns += (u64)readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; + + *ptp_time = ns; + } +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +index 0f59aa9826040..e2840fa241f29 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +@@ -222,7 +222,7 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) + u64 ns; + + ns = readl(ptpaddr + PTP_ATNR); +- ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; ++ ns += (u64)readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; + + *ptp_time = ns; + } +-- +2.39.5 + diff --git a/queue-6.14/net_sched-hfsc-fix-a-potential-uaf-in-hfsc_dequeue-t.patch b/queue-6.14/net_sched-hfsc-fix-a-potential-uaf-in-hfsc_dequeue-t.patch new file mode 100644 index 0000000000..6aaef96cf2 --- /dev/null +++ b/queue-6.14/net_sched-hfsc-fix-a-potential-uaf-in-hfsc_dequeue-t.patch @@ -0,0 +1,51 @@ +From 58b154f582934840f1dda2382efbf973d72b60c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 11:47:31 -0700 +Subject: net_sched: hfsc: Fix a potential UAF in hfsc_dequeue() too + +From: Cong Wang + +[ Upstream commit 6ccbda44e2cc3d26fd22af54c650d6d5d801addf ] + +Similarly to the previous patch, we need to safe guard hfsc_dequeue() +too. But for this one, we don't have a reliable reproducer. + +Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2") +Reported-by: Gerrard Tai +Signed-off-by: Cong Wang +Reviewed-by: Jamal Hadi Salim +Link: https://patch.msgid.link/20250417184732.943057-3-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_hfsc.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c +index e730d3f791c24..5bb4ab9941d6e 100644 +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -1637,10 +1637,16 @@ hfsc_dequeue(struct Qdisc *sch) + if (cl->qdisc->q.qlen != 0) { + /* update ed */ + next_len = qdisc_peek_len(cl->qdisc); +- if (realtime) +- update_ed(cl, next_len); +- else +- update_d(cl, next_len); ++ /* Check queue length again since some qdisc implementations ++ * (e.g., netem/codel) might empty the queue during the peek ++ * operation. ++ */ ++ if (cl->qdisc->q.qlen != 0) { ++ if (realtime) ++ update_ed(cl, next_len); ++ else ++ update_d(cl, next_len); ++ } + } else { + /* the class becomes passive */ + eltree_remove(cl); +-- +2.39.5 + diff --git a/queue-6.14/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-hand.patch b/queue-6.14/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-hand.patch new file mode 100644 index 0000000000..474a1b3be7 --- /dev/null +++ b/queue-6.14/net_sched-hfsc-fix-a-uaf-vulnerability-in-class-hand.patch @@ -0,0 +1,70 @@ +From ce557b8f5e5093d44724fb759ad47420f9b7cf26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 11:47:30 -0700 +Subject: net_sched: hfsc: Fix a UAF vulnerability in class handling + +From: Cong Wang + +[ Upstream commit 3df275ef0a6ae181e8428a6589ef5d5231e58b5c ] + +This patch fixes a Use-After-Free vulnerability in the HFSC qdisc class +handling. The issue occurs due to a time-of-check/time-of-use condition +in hfsc_change_class() when working with certain child qdiscs like netem +or codel. + +The vulnerability works as follows: +1. hfsc_change_class() checks if a class has packets (q.qlen != 0) +2. It then calls qdisc_peek_len(), which for certain qdiscs (e.g., + codel, netem) might drop packets and empty the queue +3. The code continues assuming the queue is still non-empty, adding + the class to vttree +4. This breaks HFSC scheduler assumptions that only non-empty classes + are in vttree +5. Later, when the class is destroyed, this can lead to a Use-After-Free + +The fix adds a second queue length check after qdisc_peek_len() to verify +the queue wasn't emptied. + +Fixes: 21f4d5cc25ec ("net_sched/hfsc: fix curve activation in hfsc_change_class()") +Reported-by: Gerrard Tai +Reviewed-by: Konstantin Khlebnikov +Signed-off-by: Cong Wang +Reviewed-by: Jamal Hadi Salim +Link: https://patch.msgid.link/20250417184732.943057-2-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_hfsc.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c +index c287bf8423b47..e730d3f791c24 100644 +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -958,6 +958,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + + if (cl != NULL) { + int old_flags; ++ int len = 0; + + if (parentid) { + if (cl->cl_parent && +@@ -988,9 +989,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + if (usc != NULL) + hfsc_change_usc(cl, usc, cur_time); + ++ if (cl->qdisc->q.qlen != 0) ++ len = qdisc_peek_len(cl->qdisc); ++ /* Check queue length again since some qdisc implementations ++ * (e.g., netem/codel) might empty the queue during the peek ++ * operation. ++ */ + if (cl->qdisc->q.qlen != 0) { +- int len = qdisc_peek_len(cl->qdisc); +- + if (cl->cl_flags & HFSC_RSC) { + if (old_flags & HFSC_RSC) + update_ed(cl, len); +-- +2.39.5 + diff --git a/queue-6.14/nvmet-fix-out-of-bounds-access-in-nvmet_enable_port.patch b/queue-6.14/nvmet-fix-out-of-bounds-access-in-nvmet_enable_port.patch new file mode 100644 index 0000000000..12076805de --- /dev/null +++ b/queue-6.14/nvmet-fix-out-of-bounds-access-in-nvmet_enable_port.patch @@ -0,0 +1,49 @@ +From da5a95bea16f87f0a54fc972718e28afea8bc520 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Apr 2025 10:02:50 +0200 +Subject: nvmet: fix out-of-bounds access in nvmet_enable_port + +From: Richard Weinberger + +[ Upstream commit 3d7aa0c7b4e96cd460826d932e44710cdeb3378b ] + +When trying to enable a port that has no transport configured yet, +nvmet_enable_port() uses NVMF_TRTYPE_MAX (255) to query the transports +array, causing an out-of-bounds access: + +[ 106.058694] BUG: KASAN: global-out-of-bounds in nvmet_enable_port+0x42/0x1da +[ 106.058719] Read of size 8 at addr ffffffff89dafa58 by task ln/632 +[...] +[ 106.076026] nvmet: transport type 255 not supported + +Since commit 200adac75888, NVMF_TRTYPE_MAX is the default state as configured by +nvmet_ports_make(). +Avoid this by checking for NVMF_TRTYPE_MAX before proceeding. + +Fixes: 200adac75888 ("nvme: Add PCI transport type") +Signed-off-by: Richard Weinberger +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Damien Le Moal +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c +index 2e741696f3712..6ccce0ee51573 100644 +--- a/drivers/nvme/target/core.c ++++ b/drivers/nvme/target/core.c +@@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port) + + lockdep_assert_held(&nvmet_config_sem); + ++ if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) ++ return -EINVAL; ++ + ops = nvmet_transports[port->disc_addr.trtype]; + if (!ops) { + up_write(&nvmet_config_sem); +-- +2.39.5 + diff --git a/queue-6.14/pds_core-handle-unsupported-pds_core_cmd_fw_control-.patch b/queue-6.14/pds_core-handle-unsupported-pds_core_cmd_fw_control-.patch new file mode 100644 index 0000000000..74de4b0667 --- /dev/null +++ b/queue-6.14/pds_core-handle-unsupported-pds_core_cmd_fw_control-.patch @@ -0,0 +1,60 @@ +From c1981b6ef3fce7c8e85a3bb38f2c49d3307c92c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 10:46:04 -0700 +Subject: pds_core: handle unsupported PDS_CORE_CMD_FW_CONTROL result + +From: Brett Creeley + +[ Upstream commit 2567daad69cd1107fc0ec29b1615f110d7cf7385 ] + +If the FW doesn't support the PDS_CORE_CMD_FW_CONTROL command +the driver might at the least print garbage and at the worst +crash when the user runs the "devlink dev info" devlink command. + +This happens because the stack variable fw_list is not 0 +initialized which results in fw_list.num_fw_slots being a +garbage value from the stack. Then the driver tries to access +fw_list.fw_names[i] with i >= ARRAY_SIZE and runs off the end +of the array. + +Fix this by initializing the fw_list and by not failing +completely if the devcmd fails because other useful information +is printed via devlink dev info even if the devcmd fails. + +Fixes: 45d76f492938 ("pds_core: set up device and adminq") +Signed-off-by: Brett Creeley +Reviewed-by: Simon Horman +Signed-off-by: Shannon Nelson +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20250421174606.3892-3-shannon.nelson@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/pds_core/devlink.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c +index 44971e71991ff..ca23cde385e67 100644 +--- a/drivers/net/ethernet/amd/pds_core/devlink.c ++++ b/drivers/net/ethernet/amd/pds_core/devlink.c +@@ -102,7 +102,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL, + .fw_control.oper = PDS_CORE_FW_GET_LIST, + }; +- struct pds_core_fw_list_info fw_list; ++ struct pds_core_fw_list_info fw_list = {}; + struct pdsc *pdsc = devlink_priv(dl); + union pds_core_dev_comp comp; + char buf[32]; +@@ -115,8 +115,6 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + if (!err) + memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); + mutex_unlock(&pdsc->devcmd_lock); +- if (err && err != -EIO) +- return err; + + listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); + for (i = 0; i < listlen; i++) { +-- +2.39.5 + diff --git a/queue-6.14/pds_core-make-wait_context-part-of-q_info.patch b/queue-6.14/pds_core-make-wait_context-part-of-q_info.patch new file mode 100644 index 0000000000..7156c09365 --- /dev/null +++ b/queue-6.14/pds_core-make-wait_context-part-of-q_info.patch @@ -0,0 +1,175 @@ +From 141f3d400894f12a066e9fcebda18a45b71e6398 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 10:46:06 -0700 +Subject: pds_core: make wait_context part of q_info + +From: Shannon Nelson + +[ Upstream commit 3f77c3dfffc7063428b100c4945ca2a7a8680380 ] + +Make the wait_context a full part of the q_info struct rather +than a stack variable that goes away after pdsc_adminq_post() +is done so that the context is still available after the wait +loop has given up. + +There was a case where a slow development firmware caused +the adminq request to time out, but then later the FW finally +finished the request and sent the interrupt. The handler tried +to complete_all() the completion context that had been created +on the stack in pdsc_adminq_post() but no longer existed. +This caused bad pointer usage, kernel crashes, and much wailing +and gnashing of teeth. + +Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") +Reviewed-by: Simon Horman +Signed-off-by: Shannon Nelson +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20250421174606.3892-5-shannon.nelson@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/pds_core/adminq.c | 36 +++++++++------------- + drivers/net/ethernet/amd/pds_core/core.c | 4 ++- + drivers/net/ethernet/amd/pds_core/core.h | 2 +- + 3 files changed, 18 insertions(+), 24 deletions(-) + +diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c +index c83a0a80d5334..506f682d15c10 100644 +--- a/drivers/net/ethernet/amd/pds_core/adminq.c ++++ b/drivers/net/ethernet/amd/pds_core/adminq.c +@@ -5,11 +5,6 @@ + + #include "core.h" + +-struct pdsc_wait_context { +- struct pdsc_qcq *qcq; +- struct completion wait_completion; +-}; +- + static int pdsc_process_notifyq(struct pdsc_qcq *qcq) + { + union pds_core_notifyq_comp *comp; +@@ -109,10 +104,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) + q_info = &q->info[q->tail_idx]; + q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); + +- /* Copy out the completion data */ +- memcpy(q_info->dest, comp, sizeof(*comp)); +- +- complete_all(&q_info->wc->wait_completion); ++ if (!completion_done(&q_info->completion)) { ++ memcpy(q_info->dest, comp, sizeof(*comp)); ++ complete(&q_info->completion); ++ } + + if (cq->tail_idx == cq->num_descs - 1) + cq->done_color = !cq->done_color; +@@ -162,8 +157,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) + static int __pdsc_adminq_post(struct pdsc *pdsc, + struct pdsc_qcq *qcq, + union pds_core_adminq_cmd *cmd, +- union pds_core_adminq_comp *comp, +- struct pdsc_wait_context *wc) ++ union pds_core_adminq_comp *comp) + { + struct pdsc_queue *q = &qcq->q; + struct pdsc_q_info *q_info; +@@ -205,9 +199,9 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, + /* Post the request */ + index = q->head_idx; + q_info = &q->info[index]; +- q_info->wc = wc; + q_info->dest = comp; + memcpy(q_info->desc, cmd, sizeof(*cmd)); ++ reinit_completion(&q_info->completion); + + dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n", + q->head_idx, q->tail_idx); +@@ -231,16 +225,13 @@ int pdsc_adminq_post(struct pdsc *pdsc, + union pds_core_adminq_comp *comp, + bool fast_poll) + { +- struct pdsc_wait_context wc = { +- .wait_completion = +- COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion), +- }; + unsigned long poll_interval = 1; + unsigned long poll_jiffies; + unsigned long time_limit; + unsigned long time_start; + unsigned long time_done; + unsigned long remaining; ++ struct completion *wc; + int err = 0; + int index; + +@@ -250,20 +241,19 @@ int pdsc_adminq_post(struct pdsc *pdsc, + return -ENXIO; + } + +- wc.qcq = &pdsc->adminqcq; +- index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); ++ index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp); + if (index < 0) { + err = index; + goto err_out; + } + ++ wc = &pdsc->adminqcq.q.info[index].completion; + time_start = jiffies; + time_limit = time_start + HZ * pdsc->devcmd_timeout; + do { + /* Timeslice the actual wait to catch IO errors etc early */ + poll_jiffies = msecs_to_jiffies(poll_interval); +- remaining = wait_for_completion_timeout(&wc.wait_completion, +- poll_jiffies); ++ remaining = wait_for_completion_timeout(wc, poll_jiffies); + if (remaining) + break; + +@@ -292,9 +282,11 @@ int pdsc_adminq_post(struct pdsc *pdsc, + dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n", + __func__, jiffies_to_msecs(time_done - time_start)); + +- /* Check the results */ +- if (time_after_eq(time_done, time_limit)) ++ /* Check the results and clear an un-completed timeout */ ++ if (time_after_eq(time_done, time_limit) && !completion_done(wc)) { + err = -ETIMEDOUT; ++ complete(wc); ++ } + + dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index); + dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, +diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c +index 4830292d5f879..3c60d4cf9d0e1 100644 +--- a/drivers/net/ethernet/amd/pds_core/core.c ++++ b/drivers/net/ethernet/amd/pds_core/core.c +@@ -167,8 +167,10 @@ static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa) + q->base = base; + q->base_pa = base_pa; + +- for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) ++ for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) { + cur->desc = base + (i * q->desc_size); ++ init_completion(&cur->completion); ++ } + } + + static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa) +diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h +index 543097983bf60..ec637dc4327a5 100644 +--- a/drivers/net/ethernet/amd/pds_core/core.h ++++ b/drivers/net/ethernet/amd/pds_core/core.h +@@ -96,7 +96,7 @@ struct pdsc_q_info { + unsigned int bytes; + unsigned int nbufs; + struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS]; +- struct pdsc_wait_context *wc; ++ struct completion completion; + void *dest; + }; + +-- +2.39.5 + diff --git a/queue-6.14/pds_core-prevent-possible-adminq-overflow-stuck-cond.patch b/queue-6.14/pds_core-prevent-possible-adminq-overflow-stuck-cond.patch new file mode 100644 index 0000000000..de60a2f1e4 --- /dev/null +++ b/queue-6.14/pds_core-prevent-possible-adminq-overflow-stuck-cond.patch @@ -0,0 +1,73 @@ +From c9086c3ea3062c49e84a07bbc9888b3fd42ab6ce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 10:46:03 -0700 +Subject: pds_core: Prevent possible adminq overflow/stuck condition + +From: Brett Creeley + +[ Upstream commit d9e2f070d8af60f2c8c02b2ddf0a9e90b4e9220c ] + +The pds_core's adminq is protected by the adminq_lock, which prevents +more than 1 command to be posted onto it at any one time. This makes it +so the client drivers cannot simultaneously post adminq commands. +However, the completions happen in a different context, which means +multiple adminq commands can be posted sequentially and all waiting +on completion. + +On the FW side, the backing adminq request queue is only 16 entries +long and the retry mechanism and/or overflow/stuck prevention is +lacking. This can cause the adminq to get stuck, so commands are no +longer processed and completions are no longer sent by the FW. + +As an initial fix, prevent more than 16 outstanding adminq commands so +there's no way to cause the adminq from getting stuck. This works +because the backing adminq request queue will never have more than 16 +pending adminq commands, so it will never overflow. This is done by +reducing the adminq depth to 16. + +Fixes: 45d76f492938 ("pds_core: set up device and adminq") +Reviewed-by: Michal Swiatkowski +Reviewed-by: Simon Horman +Signed-off-by: Brett Creeley +Signed-off-by: Shannon Nelson +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20250421174606.3892-2-shannon.nelson@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/pds_core/core.c | 5 +---- + drivers/net/ethernet/amd/pds_core/core.h | 2 +- + 2 files changed, 2 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c +index 536635e577279..4830292d5f879 100644 +--- a/drivers/net/ethernet/amd/pds_core/core.c ++++ b/drivers/net/ethernet/amd/pds_core/core.c +@@ -325,10 +325,7 @@ static int pdsc_core_init(struct pdsc *pdsc) + size_t sz; + int err; + +- /* Scale the descriptor ring length based on number of CPUs and VFs */ +- numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus()); +- numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev); +- numdescs = roundup_pow_of_two(numdescs); ++ numdescs = PDSC_ADMINQ_MAX_LENGTH; + err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq", + PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR, + numdescs, +diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h +index 14522d6d5f86b..543097983bf60 100644 +--- a/drivers/net/ethernet/amd/pds_core/core.h ++++ b/drivers/net/ethernet/amd/pds_core/core.h +@@ -16,7 +16,7 @@ + + #define PDSC_WATCHDOG_SECS 5 + #define PDSC_QUEUE_NAME_MAX_SZ 16 +-#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ ++#define PDSC_ADMINQ_MAX_LENGTH 16 /* must be a power of two */ + #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ + #define PDSC_TEARDOWN_RECOVERY false + #define PDSC_TEARDOWN_REMOVING true +-- +2.39.5 + diff --git a/queue-6.14/pds_core-remove-unnecessary-check-in-pds_client_admi.patch b/queue-6.14/pds_core-remove-unnecessary-check-in-pds_client_admi.patch new file mode 100644 index 0000000000..a5d743e97f --- /dev/null +++ b/queue-6.14/pds_core-remove-unnecessary-check-in-pds_client_admi.patch @@ -0,0 +1,60 @@ +From e8eb4c41ff207df9ebb1eb0a0876784d4c79ae92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 10:46:05 -0700 +Subject: pds_core: Remove unnecessary check in pds_client_adminq_cmd() + +From: Brett Creeley + +[ Upstream commit f9559d818205a4a0b9cd87181ef46e101ea11157 ] + +When the pds_core driver was first created there were some race +conditions around using the adminq, especially for client drivers. +To reduce the possibility of a race condition there's a check +against pf->state in pds_client_adminq_cmd(). This is problematic +for a couple of reasons: + +1. The PDSC_S_INITING_DRIVER bit is set during probe, but not + cleared until after everything in probe is complete, which + includes creating the auxiliary devices. For pds_fwctl this + means it can't make any adminq commands until after pds_core's + probe is complete even though the adminq is fully up by the + time pds_fwctl's auxiliary device is created. + +2. The race conditions around using the adminq have been fixed + and this path is already protected against client drivers + calling pds_client_adminq_cmd() if the adminq isn't ready, + i.e. see pdsc_adminq_post() -> pdsc_adminq_inc_if_up(). + +Fix this by removing the pf->state check in pds_client_adminq_cmd() +because invalid accesses to pds_core's adminq is already handled by +pdsc_adminq_post()->pdsc_adminq_inc_if_up(). + +Fixes: 10659034c622 ("pds_core: add the aux client API") +Reviewed-by: Simon Horman +Signed-off-by: Brett Creeley +Signed-off-by: Shannon Nelson +Reviewed-by: Jacob Keller +Link: https://patch.msgid.link/20250421174606.3892-4-shannon.nelson@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/pds_core/auxbus.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c +index 2babea1109917..b76a9b7e0aed6 100644 +--- a/drivers/net/ethernet/amd/pds_core/auxbus.c ++++ b/drivers/net/ethernet/amd/pds_core/auxbus.c +@@ -107,9 +107,6 @@ int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev, + dev_dbg(pf->dev, "%s: %s opcode %d\n", + __func__, dev_name(&padev->aux_dev.dev), req->opcode); + +- if (pf->state) +- return -ENXIO; +- + /* Wrap the client's request */ + cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD; + cmd.client_request.client_id = cpu_to_le16(padev->client_id); +-- +2.39.5 + diff --git a/queue-6.14/perf-x86-fix-non-sampling-counting-events-on-certain.patch b/queue-6.14/perf-x86-fix-non-sampling-counting-events-on-certain.patch new file mode 100644 index 0000000000..b392c715e8 --- /dev/null +++ b/queue-6.14/perf-x86-fix-non-sampling-counting-events-on-certain.patch @@ -0,0 +1,62 @@ +From fce72c6f0eb5f1b28859c409e3e799907a48b82b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 06:47:24 +0000 +Subject: perf/x86: Fix non-sampling (counting) events on certain x86 platforms + +From: Luo Gengkun + +[ Upstream commit 1a97fea9db9e9b9c4839d4232dde9f505ff5b4cc ] + +Perf doesn't work at perf stat for hardware events on certain x86 platforms: + + $perf stat -- sleep 1 + Performance counter stats for 'sleep 1': + 16.44 msec task-clock # 0.016 CPUs utilized + 2 context-switches # 121.691 /sec + 0 cpu-migrations # 0.000 /sec + 54 page-faults # 3.286 K/sec + cycles + instructions + branches + branch-misses + +The reason is that the check in x86_pmu_hw_config() for sampling events is +unexpectedly applied to counting events as well. + +It should only impact x86 platforms with limit_period used for non-PEBS +events. For Intel platforms, it should only impact some older platforms, +e.g., HSW, BDW and NHM. + +Fixes: 88ec7eedbbd2 ("perf/x86: Fix low freqency setting issue") +Signed-off-by: Luo Gengkun +Signed-off-by: Ingo Molnar +Reviewed-by: Kan Liang +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Ravi Bangoria +Link: https://lore.kernel.org/r/20250423064724.3716211-1-luogengkun@huaweicloud.com +Signed-off-by: Sasha Levin +--- + arch/x86/events/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c +index 3a27c50080f4f..ce8d4fdf54fbb 100644 +--- a/arch/x86/events/core.c ++++ b/arch/x86/events/core.c +@@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event) + if (event->attr.type == event->pmu->type) + event->hw.config |= x86_pmu_get_event_config(event); + +- if (!event->attr.freq && x86_pmu.limit_period) { ++ if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { + s64 left = event->attr.sample_period; + x86_pmu.limit_period(event, &left); + if (left > event->attr.sample_period) +-- +2.39.5 + diff --git a/queue-6.14/revert-drm-meson-vclk-fix-calculation-of-59.94-fract.patch b/queue-6.14/revert-drm-meson-vclk-fix-calculation-of-59.94-fract.patch new file mode 100644 index 0000000000..1c8415d90e --- /dev/null +++ b/queue-6.14/revert-drm-meson-vclk-fix-calculation-of-59.94-fract.patch @@ -0,0 +1,62 @@ +From 88e1908e0a566325313af6eba4bbe9304c113bfa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Apr 2025 22:12:59 +0200 +Subject: Revert "drm/meson: vclk: fix calculation of 59.94 fractional rates" + +From: Christian Hewitt + +[ Upstream commit f37bb5486ea536c1d61df89feeaeff3f84f0b560 ] + +This reverts commit bfbc68e. + +The patch does permit the offending YUV420 @ 59.94 phy_freq and +vclk_freq mode to match in calculations. It also results in all +fractional rates being unavailable for use. This was unintended +and requires the patch to be reverted. + +Fixes: bfbc68e4d869 ("drm/meson: vclk: fix calculation of 59.94 fractional rates") +Cc: stable@vger.kernel.org +Signed-off-by: Christian Hewitt +Signed-off-by: Martin Blumenstingl +Reviewed-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com +Signed-off-by: Neil Armstrong +Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com +Stable-dep-of: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c +index 2a942dc6a6dc2..2a82119eb58ed 100644 +--- a/drivers/gpu/drm/meson/meson_vclk.c ++++ b/drivers/gpu/drm/meson/meson_vclk.c +@@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, + FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", + i, params[i].phy_freq, +- FREQ_1000_1001(params[i].phy_freq/1000)*1000); ++ FREQ_1000_1001(params[i].phy_freq/10)*10); + /* Match strict frequency */ + if (phy_freq == params[i].phy_freq && + vclk_freq == params[i].vclk_freq) + return MODE_OK; + /* Match 1000/1001 variant */ +- if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && ++ if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && + vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + return MODE_OK; + } +@@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, + + for (freq = 0 ; params[freq].pixel_freq ; ++freq) { + if ((phy_freq == params[freq].phy_freq || +- phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && ++ phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && + (vclk_freq == params[freq].vclk_freq || + vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + if (vclk_freq != params[freq].vclk_freq) +-- +2.39.5 + diff --git a/queue-6.14/riscv-replace-function-like-macro-by-static-inline-f.patch b/queue-6.14/riscv-replace-function-like-macro-by-static-inline-f.patch new file mode 100644 index 0000000000..57e9642ca6 --- /dev/null +++ b/queue-6.14/riscv-replace-function-like-macro-by-static-inline-f.patch @@ -0,0 +1,64 @@ +From aaace8c5615a952ce3dcaf0e635d1ff601bb0e9f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Apr 2025 13:13:59 +0200 +Subject: riscv: Replace function-like macro by static inline function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +[ Upstream commit 121f34341d396b666d8a90b24768b40e08ca0d61 ] + +The flush_icache_range() function is implemented as a "function-like +macro with unused parameters", which can result in "unused variables" +warnings. + +Replace the macro with a static inline function, as advised by +Documentation/process/coding-style.rst. + +Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") +Signed-off-by: Björn Töpel +Link: https://lore.kernel.org/r/20250419111402.1660267-1-bjorn@kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/cacheflush.h | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h +index 8de73f91bfa37..b59ffeb668d6a 100644 +--- a/arch/riscv/include/asm/cacheflush.h ++++ b/arch/riscv/include/asm/cacheflush.h +@@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) + flush_dcache_folio(page_folio(page)); + } + +-/* +- * RISC-V doesn't have an instruction to flush parts of the instruction cache, +- * so instead we just flush the whole thing. +- */ +-#define flush_icache_range(start, end) flush_icache_all() + #define flush_icache_user_page(vma, pg, addr, len) \ + do { \ + if (vma->vm_flags & VM_EXEC) \ +@@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); + + #endif /* CONFIG_SMP */ + ++/* ++ * RISC-V doesn't have an instruction to flush parts of the instruction cache, ++ * so instead we just flush the whole thing. ++ */ ++#define flush_icache_range flush_icache_range ++static inline void flush_icache_range(unsigned long start, unsigned long end) ++{ ++ flush_icache_all(); ++} ++ + extern unsigned int riscv_cbom_block_size; + extern unsigned int riscv_cboz_block_size; + void riscv_init_cbo_blocksizes(void); +-- +2.39.5 + diff --git a/queue-6.14/riscv-uprobes-add-missing-fence.i-after-building-the.patch b/queue-6.14/riscv-uprobes-add-missing-fence.i-after-building-the.patch new file mode 100644 index 0000000000..cbfda03e03 --- /dev/null +++ b/queue-6.14/riscv-uprobes-add-missing-fence.i-after-building-the.patch @@ -0,0 +1,61 @@ +From c4eb924e1ac6faf0d9bd1d1776723813e9884113 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Apr 2025 13:14:00 +0200 +Subject: riscv: uprobes: Add missing fence.i after building the XOL buffer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +[ Upstream commit 7d1d19a11cfbfd8bae1d89cc010b2cc397cd0c48 ] + +The XOL (execute out-of-line) buffer is used to single-step the +replaced instruction(s) for uprobes. The RISC-V port was missing a +proper fence.i (i$ flushing) after constructing the XOL buffer, which +can result in incorrect execution of stale/broken instructions. + +This was found running the BPF selftests "test_progs: +uprobe_autoattach, attach_probe" on the Spacemit K1/X60, where the +uprobes tests randomly blew up. + +Reviewed-by: Guo Ren +Fixes: 74784081aac8 ("riscv: Add uprobes supported") +Signed-off-by: Björn Töpel +Link: https://lore.kernel.org/r/20250419111402.1660267-2-bjorn@kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/probes/uprobes.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c +index 4b3dc8beaf77d..cc15f7ca6cc17 100644 +--- a/arch/riscv/kernel/probes/uprobes.c ++++ b/arch/riscv/kernel/probes/uprobes.c +@@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + /* Initialize the slot */ + void *kaddr = kmap_atomic(page); + void *dst = kaddr + (vaddr & ~PAGE_MASK); ++ unsigned long start = (unsigned long)dst; + + memcpy(dst, src, len); + +@@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + *(uprobe_opcode_t *)dst = __BUG_INSN_32; + } + ++ flush_icache_range(start, start + len); + kunmap_atomic(kaddr); +- +- /* +- * We probably need flush_icache_user_page() but it needs vma. +- * This should work on most of architectures by default. If +- * architecture needs to do something different it can define +- * its own version of the function. +- */ +- flush_dcache_page(page); + } +-- +2.39.5 + diff --git a/queue-6.14/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-re.patch b/queue-6.14/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-re.patch new file mode 100644 index 0000000000..ab16e68483 --- /dev/null +++ b/queue-6.14/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-re.patch @@ -0,0 +1,91 @@ +From dcc369ccce0c356812eff5ac82f4a777f1167cfd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Apr 2025 01:51:24 -0700 +Subject: sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash + +From: Omar Sandoval + +[ Upstream commit bbce3de72be56e4b5f68924b7da9630cc89aa1a8 ] + +There is a code path in dequeue_entities() that can set the slice of a +sched_entity to U64_MAX, which sometimes results in a crash. + +The offending case is when dequeue_entities() is called to dequeue a +delayed group entity, and then the entity's parent's dequeue is delayed. +In that case: + +1. In the if (entity_is_task(se)) else block at the beginning of + dequeue_entities(), slice is set to + cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then + it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX. +2. The first for_each_sched_entity() loop dequeues the entity. +3. If the entity was its parent's only child, then the next iteration + tries to dequeue the parent. +4. If the parent's dequeue needs to be delayed, then it breaks from the + first for_each_sched_entity() loop _without updating slice_. +5. The second for_each_sched_entity() loop sets the parent's ->slice to + the saved slice, which is still U64_MAX. + +This throws off subsequent calculations with potentially catastrophic +results. A manifestation we saw in production was: + +6. In update_entity_lag(), se->slice is used to calculate limit, which + ends up as a huge negative number. +7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit + is negative, vlag > limit, so se->vlag is set to the same huge + negative number. +8. In place_entity(), se->vlag is scaled, which overflows and results in + another huge (positive or negative) number. +9. The adjusted lag is subtracted from se->vruntime, which increases or + decreases se->vruntime by a huge number. +10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which + incorrectly returns false because the vruntime is so far from the + other vruntimes on the queue, causing the + (vruntime - cfs_rq->min_vruntime) * load calulation to overflow. +11. Nothing appears to be eligible, so pick_eevdf() returns NULL. +12. pick_next_entity() tries to dereference the return value of + pick_eevdf() and crashes. + +Dumping the cfs_rq states from the core dumps with drgn showed tell-tale +huge vruntime ranges and bogus vlag values, and I also traced se->slice +being set to U64_MAX on live systems (which was usually "benign" since +the rest of the runqueue needed to be in a particular state to crash). + +Fix it in dequeue_entities() by always setting slice from the first +non-empty cfs_rq. + +Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy") +Signed-off-by: Omar Sandoval +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 89c7260103e18..3d9b68a347b76 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7083,9 +7083,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + h_nr_idle = task_has_idle_policy(p); + if (task_sleep || task_delayed || !se->sched_delayed) + h_nr_runnable = 1; +- } else { +- cfs_rq = group_cfs_rq(se); +- slice = cfs_rq_min_slice(cfs_rq); + } + + for_each_sched_entity(se) { +@@ -7095,6 +7092,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + if (p && &p->se == se) + return -1; + ++ slice = cfs_rq_min_slice(cfs_rq); + break; + } + +-- +2.39.5 + diff --git a/queue-6.14/scsi-core-clear-flags-for-scsi_cmnd-that-did-not-com.patch b/queue-6.14/scsi-core-clear-flags-for-scsi_cmnd-that-did-not-com.patch new file mode 100644 index 0000000000..016b0fcf97 --- /dev/null +++ b/queue-6.14/scsi-core-clear-flags-for-scsi_cmnd-that-did-not-com.patch @@ -0,0 +1,50 @@ +From 8dec9328aa70ff0a30a15998d666b1075188bace Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Mar 2025 11:49:33 +0300 +Subject: scsi: core: Clear flags for scsi_cmnd that did not complete + +From: Anastasia Kovaleva + +[ Upstream commit 54bebe46871d4e56e05fcf55c1a37e7efa24e0a8 ] + +Commands that have not been completed with scsi_done() do not clear the +SCMD_INITIALIZED flag and therefore will not be properly reinitialized. +Thus, the next time the scsi_cmnd structure is used, the command may +fail in scsi_cmd_runtime_exceeded() due to the old jiffies_at_alloc +value: + + kernel: sd 16:0:1:84: [sdts] tag#405 timing out command, waited 720s + kernel: sd 16:0:1:84: [sdts] tag#405 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=66636s + +Clear flags for commands that have not been completed by SCSI. + +Fixes: 4abafdc4360d ("block: remove the initialize_rq_fn blk_mq_ops method") +Signed-off-by: Anastasia Kovaleva +Link: https://lore.kernel.org/r/20250324084933.15932-2-a.kovaleva@yadro.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/scsi_lib.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index f1cfe0bb89b20..7a31dae9aa82d 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request); + */ + static void scsi_cleanup_rq(struct request *rq) + { ++ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); ++ ++ cmd->flags = 0; ++ + if (rq->rq_flags & RQF_DONTPREP) { +- scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq)); ++ scsi_mq_uninit_cmd(cmd); + rq->rq_flags &= ~RQF_DONTPREP; + } + } +-- +2.39.5 + diff --git a/queue-6.14/scsi-ufs-core-add-null-check-in-ufshcd_mcq_compl_pen.patch b/queue-6.14/scsi-ufs-core-add-null-check-in-ufshcd_mcq_compl_pen.patch new file mode 100644 index 0000000000..8c64811788 --- /dev/null +++ b/queue-6.14/scsi-ufs-core-add-null-check-in-ufshcd_mcq_compl_pen.patch @@ -0,0 +1,42 @@ +From 7b11b761461730701e180af729bf71047c1c5afd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 12 Apr 2025 14:59:09 -0500 +Subject: scsi: ufs: core: Add NULL check in + ufshcd_mcq_compl_pending_transfer() + +From: Chenyuan Yang + +[ Upstream commit 08a966a917fe3d92150fa3cc15793ad5e57051eb ] + +Add a NULL check for the returned hwq pointer by ufshcd_mcq_req_to_hwq(). + +This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix +ufshcd_abort_one racing issue"). + +Signed-off-by: Chenyuan Yang +Link: https://lore.kernel.org/r/20250412195909.315418-1-chenyuan0y@gmail.com +Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode") +Reviewed-by: Peter Wang +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufshcd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c +index 464f13da259aa..128e35a848b7b 100644 +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -5658,6 +5658,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba, + continue; + + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); ++ if (!hwq) ++ continue; + + if (force_compl) { + ufshcd_mcq_compl_all_cqes_lock(hba, hwq); +-- +2.39.5 + diff --git a/queue-6.14/scsi-ufs-mcq-add-null-check-in-ufshcd_mcq_abort.patch b/queue-6.14/scsi-ufs-mcq-add-null-check-in-ufshcd_mcq_abort.patch new file mode 100644 index 0000000000..cda630329c --- /dev/null +++ b/queue-6.14/scsi-ufs-mcq-add-null-check-in-ufshcd_mcq_abort.patch @@ -0,0 +1,69 @@ +From b610b5487b811b20179fcd2adbf75f2c3e29fbe3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Apr 2025 19:13:20 -0500 +Subject: scsi: ufs: mcq: Add NULL check in ufshcd_mcq_abort() + +From: Chenyuan Yang + +[ Upstream commit 4c324085062919d4e21c69e5e78456dcec0052fe ] + +A race can occur between the MCQ completion path and the abort handler: +once a request completes, __blk_mq_free_request() sets rq->mq_hctx to +NULL, meaning the subsequent ufshcd_mcq_req_to_hwq() call in +ufshcd_mcq_abort() can return a NULL pointer. If this NULL pointer is +dereferenced, the kernel will crash. + +Add a NULL check for the returned hwq pointer. If hwq is NULL, log an +error and return FAILED, preventing a potential NULL-pointer +dereference. As suggested by Bart, the ufshcd_cmd_inflight() check is +removed. + +This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix +ufshcd_abort_one racing issue"). + +This is found by our static analysis tool KNighter. + +Signed-off-by: Chenyuan Yang +Link: https://lore.kernel.org/r/20250410001320.2219341-1-chenyuan0y@gmail.com +Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()") +Reviewed-by: Bart Van Assche +Reviewed-by: Peter Wang +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/ufs/core/ufs-mcq.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c +index 240ce135bbfbc..f1294c29f4849 100644 +--- a/drivers/ufs/core/ufs-mcq.c ++++ b/drivers/ufs/core/ufs-mcq.c +@@ -677,13 +677,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) + unsigned long flags; + int err; + +- if (!ufshcd_cmd_inflight(lrbp->cmd)) { +- dev_err(hba->dev, +- "%s: skip abort. cmd at tag %d already completed.\n", +- __func__, tag); +- return FAILED; +- } +- + /* Skip task abort in case previous aborts failed and report failure */ + if (lrbp->req_abort_skip) { + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", +@@ -692,6 +685,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) + } + + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); ++ if (!hwq) { ++ dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", ++ __func__, tag); ++ return FAILED; ++ } + + if (ufshcd_mcq_sqe_search(hba, hwq, tag)) { + /* +-- +2.39.5 + diff --git a/queue-6.14/series b/queue-6.14/series index 41f7915b0c..5d64711a6b 100644 --- a/queue-6.14/series +++ b/queue-6.14/series @@ -30,3 +30,73 @@ drm-xe-add-performance-tunings-to-debugfs.patch drm-xe-rtp-drop-sentinels-from-arg-to-xe_rtp_process.patch drm-xe-ensure-fixed_slice_mode-gets-set-after-ccs_mo.patch lib-kconfig.ubsan-remove-default-ubsan-from-ubsan_in.patch +ceph-fix-incorrect-flush-end-position-calculation.patch +cpufreq-sun50i-prevent-out-of-bounds-access.patch +dma-contiguous-avoid-warning-about-unused-size_bytes.patch +cpufreq-apple-soc-fix-null-ptr-deref-in-apple_soc_cp.patch +cpufreq-scmi-fix-null-ptr-deref-in-scmi_cpufreq_get_.patch +cpufreq-scpi-fix-null-ptr-deref-in-scpi_cpufreq_get_.patch +scsi-ufs-mcq-add-null-check-in-ufshcd_mcq_abort.patch +virtio_pci-use-self-group-type-for-cap-commands.patch +cpufreq-cppc-fix-invalid-return-value-in-.get-callba.patch +cpufreq-do-not-enable-by-default-during-compile-test.patch +cpufreq-fix-compile-test-defaults.patch +btrfs-avoid-page_lockend-underflow-in-btrfs_punch_ho.patch +btrfs-zoned-return-eio-on-raid1-block-group-write-po.patch +cgroup-cpuset-v1-add-missing-support-for-cpuset_v2_m.patch +vhost-scsi-add-better-resource-allocation-failure-ha.patch +vhost-scsi-fix-vhost_scsi_send_bad_target.patch +vhost-scsi-fix-vhost_scsi_send_status.patch +net-mlx5-fix-null-ptr-deref-in-mlx5_create_-inner_-t.patch +net-mlx5-move-ttc-allocation-after-switch-case-to-pr.patch +scsi-core-clear-flags-for-scsi_cmnd-that-did-not-com.patch +scsi-ufs-core-add-null-check-in-ufshcd_mcq_compl_pen.patch +net-enetc-register-xdp-rx-queues-with-frag_size.patch +net-enetc-refactor-bulk-flipping-of-rx-buffers-to-se.patch +net-enetc-fix-frame-corruption-on-bpf_xdp_adjust_hea.patch +nvmet-fix-out-of-bounds-access-in-nvmet_enable_port.patch +net-lwtunnel-disable-bhs-when-required.patch +net-phylink-force-link-down-on-major_config-failure.patch +net-phylink-fix-suspend-resume-with-wol-enabled-and-.patch +net-phy-leds-fix-memory-leak.patch +virtio-net-refactor-napi_enable-paths.patch +virtio-net-refactor-napi_disable-paths.patch +virtio-net-disable-delayed-refill-when-pausing-rx.patch +tipc-fix-null-pointer-dereference-in-tipc_mon_reinit.patch +net-ethernet-mtk_eth_soc-net-revise-netsysv3-hardwar.patch +fix-a-couple-of-races-in-mnt_tree_beneath-handling-b.patch +net_sched-hfsc-fix-a-uaf-vulnerability-in-class-hand.patch +net_sched-hfsc-fix-a-potential-uaf-in-hfsc_dequeue-t.patch +net-dsa-mt7530-sync-driver-specific-behavior-of-mt75.patch +pds_core-prevent-possible-adminq-overflow-stuck-cond.patch +pds_core-handle-unsupported-pds_core_cmd_fw_control-.patch +pds_core-remove-unnecessary-check-in-pds_client_admi.patch +pds_core-make-wait_context-part-of-q_info.patch +net-phy-add-helper-for-getting-tx-amplitude-gain.patch +net-phy-dp83822-add-support-for-changing-the-transmi.patch +net-dp83822-fix-of_mdio-config-check.patch +net-stmmac-fix-dwmac1000-ptp-timestamp-status-offset.patch +net-stmmac-fix-multiplication-overflow-when-reading-.patch +block-never-reduce-ra_pages-in-blk_apply_bdi_limits.patch +bdev-use-bdev_io_min-for-statx-block-size.patch +block-move-blkdev_-get-put-_no_open-prototypes-out-o.patch +block-remove-the-backing_inode-variable-in-bdev_stat.patch +block-don-t-autoload-drivers-on-stat.patch +iommu-amd-return-an-error-if-vcpu-affinity-is-set-fo.patch +riscv-replace-function-like-macro-by-static-inline-f.patch +riscv-uprobes-add-missing-fence.i-after-building-the.patch +ublk-remove-io_cmds-list-in-ublk_queue.patch +ublk-comment-on-ubq-canceling-handling-in-ublk_queue.patch +ublk-implement-queue_rqs.patch +ublk-remove-unused-cmd-argument-to-ublk_dispatch_req.patch +ublk-call-ublk_dispatch_req-for-handling-ublk_u_io_n.patch +splice-remove-duplicate-noinline-from-pipe_clear_now.patch +fs-xattr-fix-handling-of-at_fdcwd-in-setxattrat-2-an.patch +bpf-add-namespace-to-bpf-internal-symbols.patch +revert-drm-meson-vclk-fix-calculation-of-59.94-fract.patch +drm-meson-use-unsigned-long-long-hz-for-frequency-ty.patch +perf-x86-fix-non-sampling-counting-events-on-certain.patch +loongarch-select-arch_use_memtest.patch +loongarch-make-regs_irqs_disabled-more-clear.patch +loongarch-make-do_xyz-exception-handlers-more-robust.patch +sched-eevdf-fix-se-slice-being-set-to-u64_max-and-re.patch diff --git a/queue-6.14/splice-remove-duplicate-noinline-from-pipe_clear_now.patch b/queue-6.14/splice-remove-duplicate-noinline-from-pipe_clear_now.patch new file mode 100644 index 0000000000..0c6648ff9b --- /dev/null +++ b/queue-6.14/splice-remove-duplicate-noinline-from-pipe_clear_now.patch @@ -0,0 +1,43 @@ +From f4cb876304059194a706b7501c4556237d757ffe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Apr 2025 18:00:23 +0000 +Subject: splice: remove duplicate noinline from pipe_clear_nowait + +From: T.J. Mercier + +[ Upstream commit e6f141b332ddd9007756751b6afd24f799488fd8 ] + +pipe_clear_nowait has two noinline macros, but we only need one. + +I checked the whole tree, and this is the only occurrence: + +$ grep -r "noinline .* noinline" +fs/splice.c:static noinline void noinline pipe_clear_nowait(struct file *file) +$ + +Fixes: 0f99fc513ddd ("splice: clear FMODE_NOWAIT on file if splice/vmsplice is used") +Signed-off-by: "T.J. Mercier" +Link: https://lore.kernel.org/20250423180025.2627670-1-tjmercier@google.com +Reviewed-by: Jens Axboe +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/splice.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/splice.c b/fs/splice.c +index 23fa5561b9441..bd6e889133f5c 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -45,7 +45,7 @@ + * here if set to avoid blocking other users of this pipe if splice is + * being done on it. + */ +-static noinline void noinline pipe_clear_nowait(struct file *file) ++static noinline void pipe_clear_nowait(struct file *file) + { + fmode_t fmode = READ_ONCE(file->f_mode); + +-- +2.39.5 + diff --git a/queue-6.14/tipc-fix-null-pointer-dereference-in-tipc_mon_reinit.patch b/queue-6.14/tipc-fix-null-pointer-dereference-in-tipc_mon_reinit.patch new file mode 100644 index 0000000000..d60940402a --- /dev/null +++ b/queue-6.14/tipc-fix-null-pointer-dereference-in-tipc_mon_reinit.patch @@ -0,0 +1,125 @@ +From dac4243061f6bfe69e2324cf74bd5c2d53a5e5b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 14:47:15 +0700 +Subject: tipc: fix NULL pointer dereference in tipc_mon_reinit_self() + +From: Tung Nguyen + +[ Upstream commit d63527e109e811ef11abb1c2985048fdb528b4cb ] + +syzbot reported: + +tipc: Node number set to 1055423674 +Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI +KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] +CPU: 3 UID: 0 PID: 6017 Comm: kworker/3:5 Not tainted 6.15.0-rc1-syzkaller-00246-g900241a5cc15 #0 PREEMPT(full) +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 +Workqueue: events tipc_net_finalize_work +RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719 +... +RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba +RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010 +RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 +R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010 +FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tipc_net_finalize+0x10b/0x180 net/tipc/net.c:140 + process_one_work+0x9cc/0x1b70 kernel/workqueue.c:3238 + process_scheduled_works kernel/workqueue.c:3319 [inline] + worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 + kthread+0x3c2/0x780 kernel/kthread.c:464 + ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 + +... +RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719 +... +RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba +RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010 +RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 +R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010 +FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +There is a racing condition between workqueue created when enabling +bearer and another thread created when disabling bearer right after +that as follow: + +enabling_bearer | disabling_bearer +--------------- | ---------------- +tipc_disc_timeout() | +{ | bearer_disable() + ... | { + schedule_work(&tn->work); | tipc_mon_delete() + ... | { +} | ... + | write_lock_bh(&mon->lock); + | mon->self = NULL; + | write_unlock_bh(&mon->lock); + | ... + | } +tipc_net_finalize_work() | } +{ | + ... | + tipc_net_finalize() | + { | + ... | + tipc_mon_reinit_self() | + { | + ... | + write_lock_bh(&mon->lock); | + mon->self->addr = tipc_own_addr(net); | + write_unlock_bh(&mon->lock); | + ... | + } | + ... | + } | + ... | +} | + +'mon->self' is set to NULL in disabling_bearer thread and dereferenced +later in enabling_bearer thread. + +This commit fixes this issue by validating 'mon->self' before assigning +node address to it. + +Reported-by: syzbot+ed60da8d686dc709164c@syzkaller.appspotmail.com +Fixes: 46cb01eeeb86 ("tipc: update mon's self addr when node addr generated") +Signed-off-by: Tung Nguyen +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250417074826.578115-1-tung.quang.nguyen@est.tech +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/tipc/monitor.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c +index e2f19627e43d5..b45c5b91bc7af 100644 +--- a/net/tipc/monitor.c ++++ b/net/tipc/monitor.c +@@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net) + if (!mon) + continue; + write_lock_bh(&mon->lock); +- mon->self->addr = tipc_own_addr(net); ++ if (mon->self) ++ mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } + } +-- +2.39.5 + diff --git a/queue-6.14/ublk-call-ublk_dispatch_req-for-handling-ublk_u_io_n.patch b/queue-6.14/ublk-call-ublk_dispatch_req-for-handling-ublk_u_io_n.patch new file mode 100644 index 0000000000..367bdcc248 --- /dev/null +++ b/queue-6.14/ublk-call-ublk_dispatch_req-for-handling-ublk_u_io_n.patch @@ -0,0 +1,61 @@ +From a8096321d69219c81be7fbdbbec6ff992604e3c8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Apr 2025 09:37:39 +0800 +Subject: ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA + +From: Ming Lei + +[ Upstream commit d6aa0c178bf81f30ae4a780b2bca653daa2eb633 ] + +We call io_uring_cmd_complete_in_task() to schedule task_work for handling +UBLK_U_IO_NEED_GET_DATA. + +This way is really not necessary because the current context is exactly +the ublk queue context, so call ublk_dispatch_req() directly for handling +UBLK_U_IO_NEED_GET_DATA. + +Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") +Tested-by: Jared Holzman +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250425013742.1079549-2-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index 437297022dcfa..c7761a5cfeec0 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -1812,15 +1812,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) + mutex_unlock(&ub->mutex); + } + +-static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, +- int tag) +-{ +- struct ublk_queue *ubq = ublk_get_queue(ub, q_id); +- struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); +- +- ublk_queue_cmd(ubq, req); +-} +- + static inline int ublk_check_cmd_op(u32 cmd_op) + { + u32 ioc_type = _IOC_TYPE(cmd_op); +@@ -1967,8 +1958,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + goto out; + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); +- ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); +- break; ++ req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); ++ ublk_dispatch_req(ubq, req, issue_flags); ++ return -EIOCBQUEUED; + default: + goto out; + } +-- +2.39.5 + diff --git a/queue-6.14/ublk-comment-on-ubq-canceling-handling-in-ublk_queue.patch b/queue-6.14/ublk-comment-on-ubq-canceling-handling-in-ublk_queue.patch new file mode 100644 index 0000000000..ca65484dde --- /dev/null +++ b/queue-6.14/ublk-comment-on-ubq-canceling-handling-in-ublk_queue.patch @@ -0,0 +1,43 @@ +From aace6ac562119968ef4e5e594d290167f605a638 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Mar 2025 17:51:11 +0800 +Subject: ublk: comment on ubq->canceling handling in ublk_queue_rq() + +From: Ming Lei + +[ Upstream commit 7e2fe01a69f6be3e284b38cfd2e4e0598a3b0a8f ] + +In ublk_queue_rq(), ubq->canceling has to be handled after ->fail_io and +->force_abort are dealt with, otherwise the request may not be failed +when deleting disk. + +Add comment on this usage. + +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250327095123.179113-3-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA") +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index f615b9bd82f5f..fbc397efff175 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -1314,6 +1314,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + return BLK_STS_IOERR; + ++ /* ++ * ->canceling has to be handled after ->force_abort and ->fail_io ++ * is dealt with, otherwise this request may not be failed in case ++ * of recovery, and cause hang when deleting disk ++ */ + if (unlikely(ubq->canceling)) { + __ublk_abort_rq(ubq, rq); + return BLK_STS_OK; +-- +2.39.5 + diff --git a/queue-6.14/ublk-implement-queue_rqs.patch b/queue-6.14/ublk-implement-queue_rqs.patch new file mode 100644 index 0000000000..212cb99e0c --- /dev/null +++ b/queue-6.14/ublk-implement-queue_rqs.patch @@ -0,0 +1,242 @@ +From e7adb0fb5c38e61877803bbec11aa7527833e7c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Mar 2025 17:51:17 +0800 +Subject: ublk: implement ->queue_rqs() + +From: Ming Lei + +[ Upstream commit d796cea7b9f33b6315362f504b15fcc26d678493 ] + +Implement ->queue_rqs() for improving perf in case of MQ. + +In this way, we just need to call io_uring_cmd_complete_in_task() once for +whole IO batch, then both io_uring and ublk server can get exact batch from +ublk frontend. + +Follows IOPS improvement: + +- tests + + tools/testing/selftests/ublk/kublk add -t null -q 2 [-z] + + fio/t/io_uring -p0 /dev/ublkb0 + +- results: + + more than 10% IOPS boost observed + +Pass all ublk selftests, especially the io dispatch order test. + +Cc: Uday Shankar +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250327095123.179113-9-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA") +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 131 +++++++++++++++++++++++++++++++++------ + 1 file changed, 111 insertions(+), 20 deletions(-) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index fbc397efff175..e1388a9b1e2d1 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -77,6 +77,20 @@ struct ublk_rq_data { + }; + + struct ublk_uring_cmd_pdu { ++ /* ++ * Store requests in same batch temporarily for queuing them to ++ * daemon context. ++ * ++ * It should have been stored to request payload, but we do want ++ * to avoid extra pre-allocation, and uring_cmd payload is always ++ * free for us ++ */ ++ struct request *req_list; ++ ++ /* ++ * The following two are valid in this cmd whole lifetime, and ++ * setup in ublk uring_cmd handler ++ */ + struct ublk_queue *ubq; + u16 tag; + }; +@@ -1159,14 +1173,12 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, + blk_mq_end_request(rq, BLK_STS_IOERR); + } + +-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, +- unsigned int issue_flags) ++static void ublk_dispatch_req(struct ublk_queue *ubq, ++ struct io_uring_cmd *cmd, ++ struct request *req, ++ unsigned int issue_flags) + { +- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); +- struct ublk_queue *ubq = pdu->ubq; +- int tag = pdu->tag; +- struct request *req = blk_mq_tag_to_rq( +- ubq->dev->tag_set.tags[ubq->q_id], tag); ++ int tag = req->tag; + struct ublk_io *io = &ubq->ios[tag]; + unsigned int mapped_bytes; + +@@ -1241,6 +1253,18 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); + } + ++static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, ++ unsigned int issue_flags) ++{ ++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); ++ struct ublk_queue *ubq = pdu->ubq; ++ int tag = pdu->tag; ++ struct request *req = blk_mq_tag_to_rq( ++ ubq->dev->tag_set.tags[ubq->q_id], tag); ++ ++ ublk_dispatch_req(ubq, cmd, req, issue_flags); ++} ++ + static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) + { + struct ublk_io *io = &ubq->ios[rq->tag]; +@@ -1248,6 +1272,35 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) + io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); + } + ++static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, ++ unsigned int issue_flags) ++{ ++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); ++ struct request *rq = pdu->req_list; ++ struct ublk_queue *ubq = rq->mq_hctx->driver_data; ++ struct request *next; ++ ++ while (rq) { ++ struct ublk_io *io = &ubq->ios[rq->tag]; ++ ++ next = rq->rq_next; ++ rq->rq_next = NULL; ++ ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); ++ rq = next; ++ } ++} ++ ++static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) ++{ ++ struct request *rq = rq_list_peek(l); ++ struct ublk_io *io = &ubq->ios[rq->tag]; ++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); ++ ++ pdu->req_list = rq; ++ rq_list_init(l); ++ io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); ++} ++ + static enum blk_eh_timer_return ublk_timeout(struct request *rq) + { + struct ublk_queue *ubq = rq->mq_hctx->driver_data; +@@ -1286,21 +1339,12 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) + return BLK_EH_RESET_TIMER; + } + +-static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, +- const struct blk_mq_queue_data *bd) ++static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) + { +- struct ublk_queue *ubq = hctx->driver_data; +- struct request *rq = bd->rq; + blk_status_t res; + +- if (unlikely(ubq->fail_io)) { ++ if (unlikely(ubq->fail_io)) + return BLK_STS_TARGET; +- } +- +- /* fill iod to slot in io cmd buffer */ +- res = ublk_setup_iod(ubq, rq); +- if (unlikely(res != BLK_STS_OK)) +- return BLK_STS_IOERR; + + /* With recovery feature enabled, force_abort is set in + * ublk_stop_dev() before calling del_gendisk(). We have to +@@ -1314,6 +1358,29 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + return BLK_STS_IOERR; + ++ if (unlikely(ubq->canceling)) ++ return BLK_STS_IOERR; ++ ++ /* fill iod to slot in io cmd buffer */ ++ res = ublk_setup_iod(ubq, rq); ++ if (unlikely(res != BLK_STS_OK)) ++ return BLK_STS_IOERR; ++ ++ blk_mq_start_request(rq); ++ return BLK_STS_OK; ++} ++ ++static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, ++ const struct blk_mq_queue_data *bd) ++{ ++ struct ublk_queue *ubq = hctx->driver_data; ++ struct request *rq = bd->rq; ++ blk_status_t res; ++ ++ res = ublk_prep_req(ubq, rq); ++ if (res != BLK_STS_OK) ++ return res; ++ + /* + * ->canceling has to be handled after ->force_abort and ->fail_io + * is dealt with, otherwise this request may not be failed in case +@@ -1324,12 +1391,35 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + return BLK_STS_OK; + } + +- blk_mq_start_request(bd->rq); + ublk_queue_cmd(ubq, rq); +- + return BLK_STS_OK; + } + ++static void ublk_queue_rqs(struct rq_list *rqlist) ++{ ++ struct rq_list requeue_list = { }; ++ struct rq_list submit_list = { }; ++ struct ublk_queue *ubq = NULL; ++ struct request *req; ++ ++ while ((req = rq_list_pop(rqlist))) { ++ struct ublk_queue *this_q = req->mq_hctx->driver_data; ++ ++ if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) ++ ublk_queue_cmd_list(ubq, &submit_list); ++ ubq = this_q; ++ ++ if (ublk_prep_req(ubq, req) == BLK_STS_OK) ++ rq_list_add_tail(&submit_list, req); ++ else ++ rq_list_add_tail(&requeue_list, req); ++ } ++ ++ if (ubq && !rq_list_empty(&submit_list)) ++ ublk_queue_cmd_list(ubq, &submit_list); ++ *rqlist = requeue_list; ++} ++ + static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, + unsigned int hctx_idx) + { +@@ -1342,6 +1432,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, + + static const struct blk_mq_ops ublk_mq_ops = { + .queue_rq = ublk_queue_rq, ++ .queue_rqs = ublk_queue_rqs, + .init_hctx = ublk_init_hctx, + .timeout = ublk_timeout, + }; +-- +2.39.5 + diff --git a/queue-6.14/ublk-remove-io_cmds-list-in-ublk_queue.patch b/queue-6.14/ublk-remove-io_cmds-list-in-ublk_queue.patch new file mode 100644 index 0000000000..dbf9173f20 --- /dev/null +++ b/queue-6.14/ublk-remove-io_cmds-list-in-ublk_queue.patch @@ -0,0 +1,133 @@ +From 3592feec8c9516c2ba65dd0d06f7e6f246748fc8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Mar 2025 12:14:17 -0600 +Subject: ublk: remove io_cmds list in ublk_queue + +From: Uday Shankar + +[ Upstream commit 989bcd623a8b0c32b76d9258767d8b37e53419e6 ] + +The current I/O dispatch mechanism - queueing I/O by adding it to the +io_cmds list (and poking task_work as needed), then dispatching it in +ublk server task context by reversing io_cmds and completing the +io_uring command associated to each one - was introduced by commit +7d4a93176e014 ("ublk_drv: don't forward io commands in reserve order") +to ensure that the ublk server received I/O in the same order that the +block layer submitted it to ublk_drv. This mechanism was only needed for +the "raw" task_work submission mechanism, since the io_uring task work +wrapper maintains FIFO ordering (using quite a similar mechanism in +fact). The "raw" task_work submission mechanism is no longer supported +in ublk_drv as of commit 29dc5d06613f2 ("ublk: kill queuing request by +task_work_add"), so the explicit llist/reversal is no longer needed - it +just duplicates logic already present in the underlying io_uring APIs. +Remove it. + +Signed-off-by: Uday Shankar +Reviewed-by: Ming Lei +Link: https://lore.kernel.org/r/20250318-ublk_io_cmds-v1-1-c1bb74798fef@purestorage.com +Signed-off-by: Jens Axboe +Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA") +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 46 ++++++++++------------------------------ + 1 file changed, 11 insertions(+), 35 deletions(-) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index 971b793dedd03..f615b9bd82f5f 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -73,8 +73,6 @@ + UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED) + + struct ublk_rq_data { +- struct llist_node node; +- + struct kref ref; + }; + +@@ -141,8 +139,6 @@ struct ublk_queue { + struct task_struct *ubq_daemon; + char *io_cmd_buf; + +- struct llist_head io_cmds; +- + unsigned long io_addr; /* mapped vm address */ + unsigned int max_io_sz; + bool force_abort; +@@ -1114,7 +1110,7 @@ static void ublk_fail_rq_fn(struct kref *ref) + } + + /* +- * Since __ublk_rq_task_work always fails requests immediately during ++ * Since ublk_rq_task_work_cb always fails requests immediately during + * exiting, __ublk_fail_req() is only called from abort context during + * exiting. So lock is unnecessary. + * +@@ -1163,11 +1159,14 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, + blk_mq_end_request(rq, BLK_STS_IOERR); + } + +-static inline void __ublk_rq_task_work(struct request *req, +- unsigned issue_flags) ++static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, ++ unsigned int issue_flags) + { +- struct ublk_queue *ubq = req->mq_hctx->driver_data; +- int tag = req->tag; ++ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); ++ struct ublk_queue *ubq = pdu->ubq; ++ int tag = pdu->tag; ++ struct request *req = blk_mq_tag_to_rq( ++ ubq->dev->tag_set.tags[ubq->q_id], tag); + struct ublk_io *io = &ubq->ios[tag]; + unsigned int mapped_bytes; + +@@ -1242,34 +1241,11 @@ static inline void __ublk_rq_task_work(struct request *req, + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); + } + +-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, +- unsigned issue_flags) +-{ +- struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); +- struct ublk_rq_data *data, *tmp; +- +- io_cmds = llist_reverse_order(io_cmds); +- llist_for_each_entry_safe(data, tmp, io_cmds, node) +- __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); +-} +- +-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) +-{ +- struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); +- struct ublk_queue *ubq = pdu->ubq; +- +- ublk_forward_io_cmds(ubq, issue_flags); +-} +- + static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) + { +- struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); ++ struct ublk_io *io = &ubq->ios[rq->tag]; + +- if (llist_add(&data->node, &ubq->io_cmds)) { +- struct ublk_io *io = &ubq->ios[rq->tag]; +- +- io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); +- } ++ io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); + } + + static enum blk_eh_timer_return ublk_timeout(struct request *rq) +@@ -1462,7 +1438,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) + struct request *rq; + + /* +- * Either we fail the request or ublk_rq_task_work_fn ++ * Either we fail the request or ublk_rq_task_work_cb + * will do it + */ + rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); +-- +2.39.5 + diff --git a/queue-6.14/ublk-remove-unused-cmd-argument-to-ublk_dispatch_req.patch b/queue-6.14/ublk-remove-unused-cmd-argument-to-ublk_dispatch_req.patch new file mode 100644 index 0000000000..24a9dea90d --- /dev/null +++ b/queue-6.14/ublk-remove-unused-cmd-argument-to-ublk_dispatch_req.patch @@ -0,0 +1,58 @@ +From ef38abf68f31a1404b87783b98771524384aa22a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Mar 2025 12:04:07 -0600 +Subject: ublk: remove unused cmd argument to ublk_dispatch_req() + +From: Caleb Sander Mateos + +[ Upstream commit dfbce8b798fb848a42706e2e544b78b3db22aaae ] + +ublk_dispatch_req() never uses its struct io_uring_cmd *cmd argument. +Drop it so callers don't have to pass a value. + +Signed-off-by: Caleb Sander Mateos +Link: https://lore.kernel.org/r/20250328180411.2696494-2-csander@purestorage.com +Signed-off-by: Jens Axboe +Stable-dep-of: d6aa0c178bf8 ("ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA") +Signed-off-by: Sasha Levin +--- + drivers/block/ublk_drv.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c +index e1388a9b1e2d1..437297022dcfa 100644 +--- a/drivers/block/ublk_drv.c ++++ b/drivers/block/ublk_drv.c +@@ -1174,7 +1174,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, + } + + static void ublk_dispatch_req(struct ublk_queue *ubq, +- struct io_uring_cmd *cmd, + struct request *req, + unsigned int issue_flags) + { +@@ -1262,7 +1261,7 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, + struct request *req = blk_mq_tag_to_rq( + ubq->dev->tag_set.tags[ubq->q_id], tag); + +- ublk_dispatch_req(ubq, cmd, req, issue_flags); ++ ublk_dispatch_req(ubq, req, issue_flags); + } + + static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) +@@ -1281,11 +1280,9 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, + struct request *next; + + while (rq) { +- struct ublk_io *io = &ubq->ios[rq->tag]; +- + next = rq->rq_next; + rq->rq_next = NULL; +- ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); ++ ublk_dispatch_req(ubq, rq, issue_flags); + rq = next; + } + } +-- +2.39.5 + diff --git a/queue-6.14/vhost-scsi-add-better-resource-allocation-failure-ha.patch b/queue-6.14/vhost-scsi-add-better-resource-allocation-failure-ha.patch new file mode 100644 index 0000000000..0e0843616e --- /dev/null +++ b/queue-6.14/vhost-scsi-add-better-resource-allocation-failure-ha.patch @@ -0,0 +1,94 @@ +From 9a8362f15dd715e5c29fcbfaf80861594d065289 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 13:15:10 -0600 +Subject: vhost-scsi: Add better resource allocation failure handling + +From: Mike Christie + +[ Upstream commit 3ca51662f8186b569b8fb282242c20ccbb3993c2 ] + +If we can't allocate mem to map in data for a request or can't find +a tag for a command, we currently drop the command. This leads to the +error handler running to clean it up. Instead of dropping the command +this has us return an error telling the initiator that it queued more +commands than we can handle. The initiator will then reduce how many +commands it will send us and retry later. + +Signed-off-by: Mike Christie +Message-Id: <20241203191705.19431-4-michael.christie@oracle.com> +Signed-off-by: Michael S. Tsirkin +Acked-by: Stefan Hajnoczi +Stable-dep-of: b18268713547 ("vhost-scsi: Fix vhost_scsi_send_bad_target()") +Signed-off-by: Sasha Levin +--- + drivers/vhost/scsi.c | 28 +++++++++++++++++++++++++--- + 1 file changed, 25 insertions(+), 3 deletions(-) + +diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c +index 7aeff435c1d87..ad7fa5bc0f5fc 100644 +--- a/drivers/vhost/scsi.c ++++ b/drivers/vhost/scsi.c +@@ -630,7 +630,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, + + tag = sbitmap_get(&svq->scsi_tags); + if (tag < 0) { +- pr_err("Unable to obtain tag for vhost_scsi_cmd\n"); ++ pr_warn_once("Guest sent too many cmds. Returning TASK_SET_FULL.\n"); + return ERR_PTR(-ENOMEM); + } + +@@ -929,6 +929,24 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) + target_submit(se_cmd); + } + ++static void ++vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, ++ int head, unsigned int out, u8 status) ++{ ++ struct virtio_scsi_cmd_resp __user *resp; ++ struct virtio_scsi_cmd_resp rsp; ++ int ret; ++ ++ memset(&rsp, 0, sizeof(rsp)); ++ rsp.status = status; ++ resp = vq->iov[out].iov_base; ++ ret = __copy_to_user(resp, &rsp, sizeof(rsp)); ++ if (!ret) ++ vhost_add_used_and_signal(&vs->dev, vq, head, 0); ++ else ++ pr_err("Faulted on virtio_scsi_cmd_resp\n"); ++} ++ + static void + vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, +@@ -1216,8 +1234,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) + exp_data_len + prot_bytes, + data_direction); + if (IS_ERR(cmd)) { +- vq_err(vq, "vhost_scsi_get_cmd failed %ld\n", +- PTR_ERR(cmd)); ++ ret = PTR_ERR(cmd); ++ vq_err(vq, "vhost_scsi_get_tag failed %dd\n", ret); + goto err; + } + cmd->tvc_vhost = vs; +@@ -1254,11 +1272,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) + * EINVAL: Invalid response buffer, drop the request + * EIO: Respond with bad target + * EAGAIN: Pending request ++ * ENOMEM: Could not allocate resources for request + */ + if (ret == -ENXIO) + break; + else if (ret == -EIO) + vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); ++ else if (ret == -ENOMEM) ++ vhost_scsi_send_status(vs, vq, vc.head, vc.out, ++ SAM_STAT_TASK_SET_FULL); + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); + out: + mutex_unlock(&vq->mutex); +-- +2.39.5 + diff --git a/queue-6.14/vhost-scsi-fix-vhost_scsi_send_bad_target.patch b/queue-6.14/vhost-scsi-fix-vhost_scsi_send_bad_target.patch new file mode 100644 index 0000000000..e422c19b64 --- /dev/null +++ b/queue-6.14/vhost-scsi-fix-vhost_scsi_send_bad_target.patch @@ -0,0 +1,115 @@ +From 417fcef3b0e556c385eac6accf84fe3ab2b13d43 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Apr 2025 23:29:47 -0700 +Subject: vhost-scsi: Fix vhost_scsi_send_bad_target() + +From: Dongli Zhang + +[ Upstream commit b182687135474d7ed905a07cc6cb2734b359e13e ] + +Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was +signaled by the commit 664ed90e621c ("vhost/scsi: Set +VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"), +vhost_scsi_send_bad_target() still assumes the response in a single +descriptor. + +In addition, although vhost_scsi_send_bad_target() is used by both I/O +queue and control queue, the response header is always +virtio_scsi_cmd_resp. It is required to use virtio_scsi_ctrl_tmf_resp or +virtio_scsi_ctrl_an_resp for control queue. + +Fixes: 664ed90e621c ("vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits") +Signed-off-by: Dongli Zhang +Acked-by: Jason Wang +Reviewed-by: Mike Christie +Message-Id: <20250403063028.16045-3-dongli.zhang@oracle.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/scsi.c | 48 ++++++++++++++++++++++++++++++++++---------- + 1 file changed, 37 insertions(+), 11 deletions(-) + +diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c +index ad7fa5bc0f5fc..7bfe5e5865fe9 100644 +--- a/drivers/vhost/scsi.c ++++ b/drivers/vhost/scsi.c +@@ -947,23 +947,46 @@ vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, + pr_err("Faulted on virtio_scsi_cmd_resp\n"); + } + ++#define TYPE_IO_CMD 0 ++#define TYPE_CTRL_TMF 1 ++#define TYPE_CTRL_AN 2 ++ + static void + vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, +- int head, unsigned out) ++ struct vhost_scsi_ctx *vc, int type) + { +- struct virtio_scsi_cmd_resp __user *resp; +- struct virtio_scsi_cmd_resp rsp; ++ union { ++ struct virtio_scsi_cmd_resp cmd; ++ struct virtio_scsi_ctrl_tmf_resp tmf; ++ struct virtio_scsi_ctrl_an_resp an; ++ } rsp; ++ struct iov_iter iov_iter; ++ size_t rsp_size; + int ret; + + memset(&rsp, 0, sizeof(rsp)); +- rsp.response = VIRTIO_SCSI_S_BAD_TARGET; +- resp = vq->iov[out].iov_base; +- ret = __copy_to_user(resp, &rsp, sizeof(rsp)); +- if (!ret) +- vhost_add_used_and_signal(&vs->dev, vq, head, 0); ++ ++ if (type == TYPE_IO_CMD) { ++ rsp_size = sizeof(struct virtio_scsi_cmd_resp); ++ rsp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; ++ } else if (type == TYPE_CTRL_TMF) { ++ rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); ++ rsp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; ++ } else { ++ rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); ++ rsp.an.response = VIRTIO_SCSI_S_BAD_TARGET; ++ } ++ ++ iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, ++ rsp_size); ++ ++ ret = copy_to_iter(&rsp, rsp_size, &iov_iter); ++ ++ if (likely(ret == rsp_size)) ++ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); + else +- pr_err("Faulted on virtio_scsi_cmd_resp\n"); ++ pr_err("Faulted on virtio scsi type=%d\n", type); + } + + static int +@@ -1277,7 +1300,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) + if (ret == -ENXIO) + break; + else if (ret == -EIO) +- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); ++ vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); + else if (ret == -ENOMEM) + vhost_scsi_send_status(vs, vq, vc.head, vc.out, + SAM_STAT_TASK_SET_FULL); +@@ -1510,7 +1533,10 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) + if (ret == -ENXIO) + break; + else if (ret == -EIO) +- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); ++ vhost_scsi_send_bad_target(vs, vq, &vc, ++ v_req.type == VIRTIO_SCSI_T_TMF ? ++ TYPE_CTRL_TMF : ++ TYPE_CTRL_AN); + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); + out: + mutex_unlock(&vq->mutex); +-- +2.39.5 + diff --git a/queue-6.14/vhost-scsi-fix-vhost_scsi_send_status.patch b/queue-6.14/vhost-scsi-fix-vhost_scsi_send_status.patch new file mode 100644 index 0000000000..10aa623eb3 --- /dev/null +++ b/queue-6.14/vhost-scsi-fix-vhost_scsi_send_status.patch @@ -0,0 +1,76 @@ +From e1346b9b2d7f0cab13c6851d22e7261b6bbd3adb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Apr 2025 23:29:48 -0700 +Subject: vhost-scsi: Fix vhost_scsi_send_status() + +From: Dongli Zhang + +[ Upstream commit 58465d86071b61415e25fb054201f61e83d21465 ] + +Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was +signaled by the commit 664ed90e621c ("vhost/scsi: Set +VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"), +vhost_scsi_send_bad_target() still assumes the response in a single +descriptor. + +Similar issue in vhost_scsi_send_bad_target() has been fixed in previous +commit. In addition, similar issue for vhost_scsi_complete_cmd_work() has +been fixed by the commit 6dd88fd59da8 ("vhost-scsi: unbreak any layout for +response"). + +Fixes: 3ca51662f818 ("vhost-scsi: Add better resource allocation failure handling") +Signed-off-by: Dongli Zhang +Acked-by: Jason Wang +Reviewed-by: Mike Christie +Message-Id: <20250403063028.16045-4-dongli.zhang@oracle.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/scsi.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c +index 7bfe5e5865fe9..35a03306d1345 100644 +--- a/drivers/vhost/scsi.c ++++ b/drivers/vhost/scsi.c +@@ -931,18 +931,22 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_cmd *cmd) + + static void + vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, +- int head, unsigned int out, u8 status) ++ struct vhost_scsi_ctx *vc, u8 status) + { +- struct virtio_scsi_cmd_resp __user *resp; + struct virtio_scsi_cmd_resp rsp; ++ struct iov_iter iov_iter; + int ret; + + memset(&rsp, 0, sizeof(rsp)); + rsp.status = status; +- resp = vq->iov[out].iov_base; +- ret = __copy_to_user(resp, &rsp, sizeof(rsp)); +- if (!ret) +- vhost_add_used_and_signal(&vs->dev, vq, head, 0); ++ ++ iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, ++ sizeof(rsp)); ++ ++ ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); ++ ++ if (likely(ret == sizeof(rsp))) ++ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); + else + pr_err("Faulted on virtio_scsi_cmd_resp\n"); + } +@@ -1302,7 +1306,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) + else if (ret == -EIO) + vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); + else if (ret == -ENOMEM) +- vhost_scsi_send_status(vs, vq, vc.head, vc.out, ++ vhost_scsi_send_status(vs, vq, &vc, + SAM_STAT_TASK_SET_FULL); + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); + out: +-- +2.39.5 + diff --git a/queue-6.14/virtio-net-disable-delayed-refill-when-pausing-rx.patch b/queue-6.14/virtio-net-disable-delayed-refill-when-pausing-rx.patch new file mode 100644 index 0000000000..4395e2f044 --- /dev/null +++ b/queue-6.14/virtio-net-disable-delayed-refill-when-pausing-rx.patch @@ -0,0 +1,166 @@ +From 2b13d4bec85fa1b93e6f926685af7414c1db394e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Apr 2025 14:28:03 +0700 +Subject: virtio-net: disable delayed refill when pausing rx + +From: Bui Quang Minh + +[ Upstream commit 4bc12818b363bd30f0f7348dd9ab077290a637ae ] + +When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call +napi_disable() on the receive queue's napi. In delayed refill_work, it +also calls napi_disable() on the receive queue's napi. When +napi_disable() is called on an already disabled napi, it will sleep in +napi_disable_locked while still holding the netdev_lock. As a result, +later napi_enable gets stuck too as it cannot acquire the netdev_lock. +This leads to refill_work and the pause-then-resume tx are stuck +altogether. + +This scenario can be reproducible by binding a XDP socket to virtio-net +interface without setting up the fill ring. As a result, try_fill_recv +will fail until the fill ring is set up and refill_work is scheduled. + +This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the +virtnet_rx_resume to disable future and cancel all inflights delayed +refill_work before calling napi_disable() to pause the rx. + +Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") +Acked-by: Michael S. Tsirkin +Signed-off-by: Bui Quang Minh +Acked-by: Jason Wang +Link: https://patch.msgid.link/20250417072806.18660-2-minhquangbui99@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++------- + 1 file changed, 57 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 44dbb991787ed..3e4896d9537ee 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -3318,7 +3318,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) + return NETDEV_TX_OK; + } + +-static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) ++static void __virtnet_rx_pause(struct virtnet_info *vi, ++ struct receive_queue *rq) + { + bool running = netif_running(vi->dev); + +@@ -3328,17 +3329,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) + } + } + +-static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) ++static void virtnet_rx_pause_all(struct virtnet_info *vi) ++{ ++ int i; ++ ++ /* ++ * Make sure refill_work does not run concurrently to ++ * avoid napi_disable race which leads to deadlock. ++ */ ++ disable_delayed_refill(vi); ++ cancel_delayed_work_sync(&vi->refill); ++ for (i = 0; i < vi->max_queue_pairs; i++) ++ __virtnet_rx_pause(vi, &vi->rq[i]); ++} ++ ++static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) ++{ ++ /* ++ * Make sure refill_work does not run concurrently to ++ * avoid napi_disable race which leads to deadlock. ++ */ ++ disable_delayed_refill(vi); ++ cancel_delayed_work_sync(&vi->refill); ++ __virtnet_rx_pause(vi, rq); ++} ++ ++static void __virtnet_rx_resume(struct virtnet_info *vi, ++ struct receive_queue *rq, ++ bool refill) + { + bool running = netif_running(vi->dev); + +- if (!try_fill_recv(vi, rq, GFP_KERNEL)) ++ if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + + if (running) + virtnet_napi_enable(rq); + } + ++static void virtnet_rx_resume_all(struct virtnet_info *vi) ++{ ++ int i; ++ ++ enable_delayed_refill(vi); ++ for (i = 0; i < vi->max_queue_pairs; i++) { ++ if (i < vi->curr_queue_pairs) ++ __virtnet_rx_resume(vi, &vi->rq[i], true); ++ else ++ __virtnet_rx_resume(vi, &vi->rq[i], false); ++ } ++} ++ ++static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) ++{ ++ enable_delayed_refill(vi); ++ __virtnet_rx_resume(vi, rq, true); ++} ++ + static int virtnet_rx_resize(struct virtnet_info *vi, + struct receive_queue *rq, u32 ring_num) + { +@@ -5939,12 +5986,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + if (prog) + bpf_prog_add(prog, vi->max_queue_pairs - 1); + ++ virtnet_rx_pause_all(vi); ++ + /* Make sure NAPI is not using any XDP TX queues for RX. */ + if (netif_running(dev)) { +- for (i = 0; i < vi->max_queue_pairs; i++) { +- virtnet_napi_disable(&vi->rq[i]); ++ for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_tx_disable(&vi->sq[i]); +- } + } + + if (!prog) { +@@ -5976,13 +6023,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + vi->xdp_enabled = false; + } + ++ virtnet_rx_resume_all(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (old_prog) + bpf_prog_put(old_prog); +- if (netif_running(dev)) { +- virtnet_napi_enable(&vi->rq[i]); ++ if (netif_running(dev)) + virtnet_napi_tx_enable(&vi->sq[i]); +- } + } + + return 0; +@@ -5994,11 +6040,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + ++ virtnet_rx_resume_all(vi); + if (netif_running(dev)) { +- for (i = 0; i < vi->max_queue_pairs; i++) { +- virtnet_napi_enable(&vi->rq[i]); ++ for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_tx_enable(&vi->sq[i]); +- } + } + if (prog) + bpf_prog_sub(prog, vi->max_queue_pairs - 1); +-- +2.39.5 + diff --git a/queue-6.14/virtio-net-refactor-napi_disable-paths.patch b/queue-6.14/virtio-net-refactor-napi_disable-paths.patch new file mode 100644 index 0000000000..ca75187992 --- /dev/null +++ b/queue-6.14/virtio-net-refactor-napi_disable-paths.patch @@ -0,0 +1,104 @@ +From 9f8c122ada67ec953e09ee06e85247754aece694 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 01:12:10 +0000 +Subject: virtio-net: Refactor napi_disable paths + +From: Joe Damato + +[ Upstream commit 986a93045183ae2f13e6d99d990ae8be36f6d6b0 ] + +Create virtnet_napi_disable helper and refactor virtnet_napi_tx_disable +to take a struct send_queue. + +Signed-off-by: Joe Damato +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Tested-by: Lei Yang +Reviewed-by: Xuan Zhuo +Link: https://patch.msgid.link/20250307011215.266806-3-jdamato@fastly.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index d25f68004f97e..44dbb991787ed 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2827,12 +2827,21 @@ static void virtnet_napi_tx_enable(struct send_queue *sq) + virtnet_napi_do_enable(sq->vq, napi); + } + +-static void virtnet_napi_tx_disable(struct napi_struct *napi) ++static void virtnet_napi_tx_disable(struct send_queue *sq) + { ++ struct napi_struct *napi = &sq->napi; ++ + if (napi->weight) + napi_disable(napi); + } + ++static void virtnet_napi_disable(struct receive_queue *rq) ++{ ++ struct napi_struct *napi = &rq->napi; ++ ++ napi_disable(napi); ++} ++ + static void refill_work(struct work_struct *work) + { + struct virtnet_info *vi = +@@ -2843,7 +2852,7 @@ static void refill_work(struct work_struct *work) + for (i = 0; i < vi->curr_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; + +- napi_disable(&rq->napi); ++ virtnet_napi_disable(rq); + still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); + virtnet_napi_enable(rq); + +@@ -3042,8 +3051,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget) + + static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) + { +- virtnet_napi_tx_disable(&vi->sq[qp_index].napi); +- napi_disable(&vi->rq[qp_index].napi); ++ virtnet_napi_tx_disable(&vi->sq[qp_index]); ++ virtnet_napi_disable(&vi->rq[qp_index]); + xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); + } + +@@ -3314,7 +3323,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) + bool running = netif_running(vi->dev); + + if (running) { +- napi_disable(&rq->napi); ++ virtnet_napi_disable(rq); + virtnet_cancel_dim(vi, &rq->dim); + } + } +@@ -3356,7 +3365,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) + qindex = sq - vi->sq; + + if (running) +- virtnet_napi_tx_disable(&sq->napi); ++ virtnet_napi_tx_disable(sq); + + txq = netdev_get_tx_queue(vi->dev, qindex); + +@@ -5933,8 +5942,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + /* Make sure NAPI is not using any XDP TX queues for RX. */ + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { +- napi_disable(&vi->rq[i].napi); +- virtnet_napi_tx_disable(&vi->sq[i].napi); ++ virtnet_napi_disable(&vi->rq[i]); ++ virtnet_napi_tx_disable(&vi->sq[i]); + } + } + +-- +2.39.5 + diff --git a/queue-6.14/virtio-net-refactor-napi_enable-paths.patch b/queue-6.14/virtio-net-refactor-napi_enable-paths.patch new file mode 100644 index 0000000000..e0082007e8 --- /dev/null +++ b/queue-6.14/virtio-net-refactor-napi_enable-paths.patch @@ -0,0 +1,134 @@ +From c7119abb9eb0ede2efe8a6bb34e0774b82e63bf6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Mar 2025 01:12:09 +0000 +Subject: virtio-net: Refactor napi_enable paths + +From: Joe Damato + +[ Upstream commit 2af5adf962d4611a576061501faa8fb39590407e ] + +Refactor virtnet_napi_enable and virtnet_napi_tx_enable to take a struct +receive_queue. Create a helper, virtnet_napi_do_enable, which contains +the logic to enable a NAPI. + +Signed-off-by: Joe Damato +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Tested-by: Lei Yang +Reviewed-by: Xuan Zhuo +Link: https://patch.msgid.link/20250307011215.266806-2-jdamato@fastly.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index d1ed544ba03ac..d25f68004f97e 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2789,7 +2789,8 @@ static void skb_recv_done(struct virtqueue *rvq) + virtqueue_napi_schedule(&rq->napi, rvq); + } + +-static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) ++static void virtnet_napi_do_enable(struct virtqueue *vq, ++ struct napi_struct *napi) + { + napi_enable(napi); + +@@ -2802,10 +2803,16 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi) + local_bh_enable(); + } + +-static void virtnet_napi_tx_enable(struct virtnet_info *vi, +- struct virtqueue *vq, +- struct napi_struct *napi) ++static void virtnet_napi_enable(struct receive_queue *rq) + { ++ virtnet_napi_do_enable(rq->vq, &rq->napi); ++} ++ ++static void virtnet_napi_tx_enable(struct send_queue *sq) ++{ ++ struct virtnet_info *vi = sq->vq->vdev->priv; ++ struct napi_struct *napi = &sq->napi; ++ + if (!napi->weight) + return; + +@@ -2817,7 +2824,7 @@ static void virtnet_napi_tx_enable(struct virtnet_info *vi, + return; + } + +- return virtnet_napi_enable(vq, napi); ++ virtnet_napi_do_enable(sq->vq, napi); + } + + static void virtnet_napi_tx_disable(struct napi_struct *napi) +@@ -2838,7 +2845,7 @@ static void refill_work(struct work_struct *work) + + napi_disable(&rq->napi); + still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); +- virtnet_napi_enable(rq->vq, &rq->napi); ++ virtnet_napi_enable(rq); + + /* In theory, this can happen: if we don't get any buffers in + * we will *never* try to fill again. +@@ -3055,8 +3062,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) + if (err < 0) + goto err_xdp_reg_mem_model; + +- virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); +- virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); ++ virtnet_napi_enable(&vi->rq[qp_index]); ++ virtnet_napi_tx_enable(&vi->sq[qp_index]); + + return 0; + +@@ -3320,7 +3327,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) + schedule_delayed_work(&vi->refill, 0); + + if (running) +- virtnet_napi_enable(rq->vq, &rq->napi); ++ virtnet_napi_enable(rq); + } + + static int virtnet_rx_resize(struct virtnet_info *vi, +@@ -3383,7 +3390,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) + __netif_tx_unlock_bh(txq); + + if (running) +- virtnet_napi_tx_enable(vi, sq->vq, &sq->napi); ++ virtnet_napi_tx_enable(sq); + } + + static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, +@@ -5964,9 +5971,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + if (old_prog) + bpf_prog_put(old_prog); + if (netif_running(dev)) { +- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); +- virtnet_napi_tx_enable(vi, vi->sq[i].vq, +- &vi->sq[i].napi); ++ virtnet_napi_enable(&vi->rq[i]); ++ virtnet_napi_tx_enable(&vi->sq[i]); + } + } + +@@ -5981,9 +5987,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { +- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); +- virtnet_napi_tx_enable(vi, vi->sq[i].vq, +- &vi->sq[i].napi); ++ virtnet_napi_enable(&vi->rq[i]); ++ virtnet_napi_tx_enable(&vi->sq[i]); + } + } + if (prog) +-- +2.39.5 + diff --git a/queue-6.14/virtio_pci-use-self-group-type-for-cap-commands.patch b/queue-6.14/virtio_pci-use-self-group-type-for-cap-commands.patch new file mode 100644 index 0000000000..86053259a7 --- /dev/null +++ b/queue-6.14/virtio_pci-use-self-group-type-for-cap-commands.patch @@ -0,0 +1,65 @@ +From 405ef5e1d5701da64acfa6682a6d1a8ccb6dbbbf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Mar 2025 10:14:42 -0600 +Subject: virtio_pci: Use self group type for cap commands + +From: Daniel Jurgens + +[ Upstream commit 16c22c56d4282584742022a37d4f79a46ca6094a ] + +Section 2.12.1.2 of v1.4 of the VirtIO spec states: + +The device and driver capabilities commands are currently defined for +self group type. +1. VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY +2. VIRTIO_ADMIN_CMD_DEVICE_CAP_GET +3. VIRTIO_ADMIN_CMD_DRIVER_CAP_SET + +Fixes: bfcad518605d ("virtio: Manage device and driver capabilities via the admin commands") +Signed-off-by: Daniel Jurgens +Reviewed-by: Parav Pandit +Message-Id: <20250304161442.90700-1-danielj@nvidia.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/virtio/virtio_pci_modern.c | 4 ++-- + include/uapi/linux/virtio_pci.h | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c +index 5eaade7578606..d50fe030d8253 100644 +--- a/drivers/virtio/virtio_pci_modern.c ++++ b/drivers/virtio/virtio_pci_modern.c +@@ -247,7 +247,7 @@ virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev) + sg_init_one(&data_sg, get_data, sizeof(*get_data)); + sg_init_one(&result_sg, result, sizeof(*result)); + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET); +- cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); ++ cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); + cmd.data_sg = &data_sg; + cmd.result_sg = &result_sg; + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); +@@ -305,7 +305,7 @@ static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev) + + sg_init_one(&result_sg, data, sizeof(*data)); + cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY); +- cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); ++ cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); + cmd.result_sg = &result_sg; + + ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); +diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h +index 8549d45712571..c691ac210ce2e 100644 +--- a/include/uapi/linux/virtio_pci.h ++++ b/include/uapi/linux/virtio_pci.h +@@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { + #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 + + /* Admin command group type. */ ++#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 + #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 + + /* Transitional device admin command. */ +-- +2.39.5 +