From: Sasha Levin Date: Thu, 19 Dec 2024 19:23:46 +0000 (-0500) Subject: Fixes for 6.12 X-Git-Tag: v6.1.122~56 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=02d181a985a4d5dd041702bcc6f67c8ac7267309;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.12 Signed-off-by: Sasha Levin --- diff --git a/queue-6.12/erofs-add-erofs_sb_free-helper.patch b/queue-6.12/erofs-add-erofs_sb_free-helper.patch new file mode 100644 index 00000000000..3f718ce26ef --- /dev/null +++ b/queue-6.12/erofs-add-erofs_sb_free-helper.patch @@ -0,0 +1,102 @@ +From e22342b717900cb58e6c9134ee441ae5b5023f0b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 21:35:01 +0800 +Subject: erofs: add erofs_sb_free() helper + +From: Gao Xiang + +[ Upstream commit e2de3c1bf6a0c99b089bd706a62da8f988918858 ] + +Unify the common parts of erofs_fc_free() and erofs_kill_sb() as +erofs_sb_free(). + +Thus, fput() in erofs_fc_get_tree() is no longer needed, too. + +Reviewed-by: Chao Yu +Signed-off-by: Gao Xiang +Link: https://lore.kernel.org/r/20241212133504.2047178-1-hsiangkao@linux.alibaba.com +Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default") +Signed-off-by: Sasha Levin +--- + fs/erofs/super.c | 36 +++++++++++++++++++----------------- + 1 file changed, 19 insertions(+), 17 deletions(-) + +diff --git a/fs/erofs/super.c b/fs/erofs/super.c +index 2dd7d819572f..c40821346d50 100644 +--- a/fs/erofs/super.c ++++ b/fs/erofs/super.c +@@ -718,16 +718,19 @@ static int erofs_fc_get_tree(struct fs_context *fc) + GET_TREE_BDEV_QUIET_LOOKUP : 0); + #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE + if (ret == -ENOTBLK) { ++ struct file *file; ++ + if (!fc->source) + return invalf(fc, "No source specified"); +- sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); +- if (IS_ERR(sbi->fdev)) +- return PTR_ERR(sbi->fdev); ++ ++ file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); ++ if (IS_ERR(file)) ++ return PTR_ERR(file); ++ sbi->fdev = file; + + if (S_ISREG(file_inode(sbi->fdev)->i_mode) && + sbi->fdev->f_mapping->a_ops->read_folio) + return get_tree_nodev(fc, erofs_fc_fill_super); +- fput(sbi->fdev); + } + #endif + return ret; +@@ -778,19 +781,24 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs) + kfree(devs); + } + +-static void erofs_fc_free(struct fs_context *fc) ++static void erofs_sb_free(struct erofs_sb_info *sbi) + { +- struct erofs_sb_info *sbi = fc->s_fs_info; +- +- if (!sbi) +- return; +- + erofs_free_dev_context(sbi->devs); + kfree(sbi->fsid); + kfree(sbi->domain_id); ++ if (sbi->fdev) ++ fput(sbi->fdev); + kfree(sbi); + } + ++static void erofs_fc_free(struct fs_context *fc) ++{ ++ struct erofs_sb_info *sbi = fc->s_fs_info; ++ ++ if (sbi) /* free here if an error occurs before transferring to sb */ ++ erofs_sb_free(sbi); ++} ++ + static const struct fs_context_operations erofs_context_ops = { + .parse_param = erofs_fc_parse_param, + .get_tree = erofs_fc_get_tree, +@@ -828,15 +836,9 @@ static void erofs_kill_sb(struct super_block *sb) + kill_anon_super(sb); + else + kill_block_super(sb); +- +- erofs_free_dev_context(sbi->devs); + fs_put_dax(sbi->dax_dev, NULL); + erofs_fscache_unregister_fs(sb); +- kfree(sbi->fsid); +- kfree(sbi->domain_id); +- if (sbi->fdev) +- fput(sbi->fdev); +- kfree(sbi); ++ erofs_sb_free(sbi); + sb->s_fs_info = NULL; + } + +-- +2.39.5 + diff --git a/queue-6.12/erofs-fix-psi-memstall-accounting.patch b/queue-6.12/erofs-fix-psi-memstall-accounting.patch new file mode 100644 index 00000000000..ad8f84de101 --- /dev/null +++ b/queue-6.12/erofs-fix-psi-memstall-accounting.patch @@ -0,0 +1,46 @@ +From b2236194259e71097c538f6e23ac5dc9e90e1cb1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Nov 2024 16:52:36 +0800 +Subject: erofs: fix PSI memstall accounting + +From: Gao Xiang + +[ Upstream commit 1a2180f6859c73c674809f9f82e36c94084682ba ] + +Max Kellermann recently reported psi_group_cpu.tasks[NR_MEMSTALL] is +incorrect in the 6.11.9 kernel. + +The root cause appears to be that, since the problematic commit, bio +can be NULL, causing psi_memstall_leave() to be skipped in +z_erofs_submit_queue(). + +Reported-by: Max Kellermann +Closes: https://lore.kernel.org/r/CAKPOu+8tvSowiJADW2RuKyofL_CSkm_SuyZA7ME5vMLWmL6pqw@mail.gmail.com +Fixes: 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly") +Reviewed-by: Chao Yu +Signed-off-by: Gao Xiang +Link: https://lore.kernel.org/r/20241127085236.3538334-1-hsiangkao@linux.alibaba.com +Signed-off-by: Sasha Levin +--- + fs/erofs/zdata.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c +index a569ff9dfd04..1a00f061798a 100644 +--- a/fs/erofs/zdata.c ++++ b/fs/erofs/zdata.c +@@ -1679,9 +1679,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, + erofs_fscache_submit_bio(bio); + else + submit_bio(bio); +- if (memstall) +- psi_memstall_leave(&pflags); + } ++ if (memstall) ++ psi_memstall_leave(&pflags); + + /* + * although background is preferred, no one is pending for submission. +-- +2.39.5 + diff --git a/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch b/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch new file mode 100644 index 00000000000..c9226189f6d --- /dev/null +++ b/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch @@ -0,0 +1,157 @@ +From 350f3baf7618fcdaf53e7a7a4e6d9dfc7b35784d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Dec 2024 07:54:01 +0800 +Subject: erofs: reference `struct erofs_device_info` for erofs_map_dev + +From: Gao Xiang + +[ Upstream commit f8d920a402aec3482931cb5f1539ed438740fc49 ] + +Record `m_sb` and `m_dif` to replace `m_fscache`, `m_daxdev`, `m_fp` +and `m_dax_part_off` in order to simplify the codebase. + +Note that `m_bdev` is still left since it can be assigned from +`sb->s_bdev` directly. + +Reviewed-by: Chao Yu +Signed-off-by: Gao Xiang +Link: https://lore.kernel.org/r/20241212235401.2857246-1-hsiangkao@linux.alibaba.com +Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default") +Signed-off-by: Sasha Levin +--- + fs/erofs/data.c | 26 ++++++++++---------------- + fs/erofs/fileio.c | 2 +- + fs/erofs/fscache.c | 4 ++-- + fs/erofs/internal.h | 6 ++---- + 4 files changed, 15 insertions(+), 23 deletions(-) + +diff --git a/fs/erofs/data.c b/fs/erofs/data.c +index 365c988262b1..722151d3fee8 100644 +--- a/fs/erofs/data.c ++++ b/fs/erofs/data.c +@@ -186,19 +186,13 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) + } + + static void erofs_fill_from_devinfo(struct erofs_map_dev *map, +- struct erofs_device_info *dif) ++ struct super_block *sb, struct erofs_device_info *dif) + { ++ map->m_sb = sb; ++ map->m_dif = dif; + map->m_bdev = NULL; +- map->m_fp = NULL; +- if (dif->file) { +- if (S_ISBLK(file_inode(dif->file)->i_mode)) +- map->m_bdev = file_bdev(dif->file); +- else +- map->m_fp = dif->file; +- } +- map->m_daxdev = dif->dax_dev; +- map->m_dax_part_off = dif->dax_part_off; +- map->m_fscache = dif->fscache; ++ if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode)) ++ map->m_bdev = file_bdev(dif->file); + } + + int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) +@@ -208,7 +202,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + erofs_off_t startoff, length; + int id; + +- erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0); ++ erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0); + map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */ + if (map->m_deviceid) { + down_read(&devs->rwsem); +@@ -222,7 +216,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + up_read(&devs->rwsem); + return 0; + } +- erofs_fill_from_devinfo(map, dif); ++ erofs_fill_from_devinfo(map, sb, dif); + up_read(&devs->rwsem); + } else if (devs->extra_devices && !devs->flatdev) { + down_read(&devs->rwsem); +@@ -235,7 +229,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + if (map->m_pa >= startoff && + map->m_pa < startoff + length) { + map->m_pa -= startoff; +- erofs_fill_from_devinfo(map, dif); ++ erofs_fill_from_devinfo(map, sb, dif); + break; + } + } +@@ -305,7 +299,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + + iomap->offset = map.m_la; + if (flags & IOMAP_DAX) +- iomap->dax_dev = mdev.m_daxdev; ++ iomap->dax_dev = mdev.m_dif->dax_dev; + else + iomap->bdev = mdev.m_bdev; + iomap->length = map.m_llen; +@@ -334,7 +328,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + iomap->type = IOMAP_MAPPED; + iomap->addr = mdev.m_pa; + if (flags & IOMAP_DAX) +- iomap->addr += mdev.m_dax_part_off; ++ iomap->addr += mdev.m_dif->dax_part_off; + } + return 0; + } +diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c +index 3af96b1e2c2a..a61b8faec651 100644 +--- a/fs/erofs/fileio.c ++++ b/fs/erofs/fileio.c +@@ -67,7 +67,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev) + GFP_KERNEL | __GFP_NOFAIL); + + bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ); +- rq->iocb.ki_filp = mdev->m_fp; ++ rq->iocb.ki_filp = mdev->m_dif->file; + return rq; + } + +diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c +index ce7e38c82719..ce3d8737df85 100644 +--- a/fs/erofs/fscache.c ++++ b/fs/erofs/fscache.c +@@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) + + io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); + bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); +- io->io.private = mdev->m_fscache->cookie; ++ io->io.private = mdev->m_dif->fscache->cookie; + io->io.end_io = erofs_fscache_bio_endio; + refcount_set(&io->io.ref, 1); + return &io->bio; +@@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) + if (!io) + return -ENOMEM; + iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); +- ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, ++ ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie, + mdev.m_pa + (pos - map.m_la), io); + erofs_fscache_req_io_put(io); + +diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h +index d70aa2410472..3108ece1d709 100644 +--- a/fs/erofs/internal.h ++++ b/fs/erofs/internal.h +@@ -366,11 +366,9 @@ enum { + }; + + struct erofs_map_dev { +- struct erofs_fscache *m_fscache; ++ struct super_block *m_sb; ++ struct erofs_device_info *m_dif; + struct block_device *m_bdev; +- struct dax_device *m_daxdev; +- struct file *m_fp; +- u64 m_dax_part_off; + + erofs_off_t m_pa; + unsigned int m_deviceid; +-- +2.39.5 + diff --git a/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch b/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch new file mode 100644 index 00000000000..111c06a7a65 --- /dev/null +++ b/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch @@ -0,0 +1,139 @@ +From 11a8695f7aed6930bfc758b83fec24f1eb4ebcb8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 21:43:36 +0800 +Subject: erofs: use buffered I/O for file-backed mounts by default + +From: Gao Xiang + +[ Upstream commit 6422cde1b0d5a31b206b263417c1c2b3c80fe82c ] + +For many use cases (e.g. container images are just fetched from remote), +performance will be impacted if underlay page cache is up-to-date but +direct i/o flushes dirty pages first. + +Instead, let's use buffered I/O by default to keep in sync with loop +devices and add a (re)mount option to explicitly give a try to use +direct I/O if supported by the underlying files. + +The container startup time is improved as below: +[workload] docker.io/library/workpress:latest + unpack 1st run non-1st runs +EROFS snapshotter buffered I/O file 4.586404265s 0.308s 0.198s +EROFS snapshotter direct I/O file 4.581742849s 2.238s 0.222s +EROFS snapshotter loop 4.596023152s 0.346s 0.201s +Overlayfs snapshotter 5.382851037s 0.206s 0.214s + +Fixes: fb176750266a ("erofs: add file-backed mount support") +Cc: Derek McGowan +Reviewed-by: Chao Yu +Signed-off-by: Gao Xiang +Link: https://lore.kernel.org/r/20241212134336.2059899-1-hsiangkao@linux.alibaba.com +Signed-off-by: Sasha Levin +--- + fs/erofs/fileio.c | 7 +++++-- + fs/erofs/internal.h | 1 + + fs/erofs/super.c | 23 +++++++++++++++-------- + 3 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c +index a61b8faec651..33f8539dda4a 100644 +--- a/fs/erofs/fileio.c ++++ b/fs/erofs/fileio.c +@@ -9,6 +9,7 @@ struct erofs_fileio_rq { + struct bio_vec bvecs[BIO_MAX_VECS]; + struct bio bio; + struct kiocb iocb; ++ struct super_block *sb; + }; + + struct erofs_fileio { +@@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) + rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT; + rq->iocb.ki_ioprio = get_current_ioprio(); + rq->iocb.ki_complete = erofs_fileio_ki_complete; +- rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ? +- IOCB_DIRECT : 0; ++ if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) && ++ rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ++ rq->iocb.ki_flags = IOCB_DIRECT; + iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, + rq->bio.bi_iter.bi_size); + ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); +@@ -68,6 +70,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev) + + bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ); + rq->iocb.ki_filp = mdev->m_dif->file; ++ rq->sb = mdev->m_sb; + return rq; + } + +diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h +index 3108ece1d709..77e785a6dfa7 100644 +--- a/fs/erofs/internal.h ++++ b/fs/erofs/internal.h +@@ -182,6 +182,7 @@ struct erofs_sb_info { + #define EROFS_MOUNT_POSIX_ACL 0x00000020 + #define EROFS_MOUNT_DAX_ALWAYS 0x00000040 + #define EROFS_MOUNT_DAX_NEVER 0x00000080 ++#define EROFS_MOUNT_DIRECT_IO 0x00000100 + + #define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option) + #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) +diff --git a/fs/erofs/super.c b/fs/erofs/super.c +index 60f7bd43a5a4..5b279977c9d5 100644 +--- a/fs/erofs/super.c ++++ b/fs/erofs/super.c +@@ -379,14 +379,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi) + } + + enum { +- Opt_user_xattr, +- Opt_acl, +- Opt_cache_strategy, +- Opt_dax, +- Opt_dax_enum, +- Opt_device, +- Opt_fsid, +- Opt_domain_id, ++ Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, ++ Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, + Opt_err + }; + +@@ -413,6 +407,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { + fsparam_string("device", Opt_device), + fsparam_string("fsid", Opt_fsid), + fsparam_string("domain_id", Opt_domain_id), ++ fsparam_flag_no("directio", Opt_directio), + {} + }; + +@@ -526,6 +521,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); + break; + #endif ++ case Opt_directio: ++#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE ++ if (result.boolean) ++ set_opt(&sbi->opt, DIRECT_IO); ++ else ++ clear_opt(&sbi->opt, DIRECT_IO); ++#else ++ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); ++#endif ++ break; + default: + return -ENOPARAM; + } +@@ -963,6 +968,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) + seq_puts(seq, ",dax=always"); + if (test_opt(opt, DAX_NEVER)) + seq_puts(seq, ",dax=never"); ++ if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO)) ++ seq_puts(seq, ",directio"); + #ifdef CONFIG_EROFS_FS_ONDEMAND + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); +-- +2.39.5 + diff --git a/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch b/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch new file mode 100644 index 00000000000..cc7b261b484 --- /dev/null +++ b/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch @@ -0,0 +1,219 @@ +From a4931361873eb6e0e8ab75aa98a1d3c7d78e46de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Dec 2024 20:53:08 +0800 +Subject: erofs: use `struct erofs_device_info` for the primary device + +From: Gao Xiang + +[ Upstream commit 7b00af2c5414dc01e0718deef7ead81102867636 ] + +Instead of just listing each one directly in `struct erofs_sb_info` +except that we still use `sb->s_bdev` for the primary block device. + +Reviewed-by: Chao Yu +Signed-off-by: Gao Xiang +Link: https://lore.kernel.org/r/20241216125310.930933-2-hsiangkao@linux.alibaba.com +Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default") +Signed-off-by: Sasha Levin +--- + fs/erofs/data.c | 12 ++++-------- + fs/erofs/fscache.c | 6 +++--- + fs/erofs/internal.h | 8 ++------ + fs/erofs/super.c | 27 +++++++++++++-------------- + 4 files changed, 22 insertions(+), 31 deletions(-) + +diff --git a/fs/erofs/data.c b/fs/erofs/data.c +index fa51437e1d99..365c988262b1 100644 +--- a/fs/erofs/data.c ++++ b/fs/erofs/data.c +@@ -63,10 +63,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) + + buf->file = NULL; + if (erofs_is_fileio_mode(sbi)) { +- buf->file = sbi->fdev; /* some fs like FUSE needs it */ ++ buf->file = sbi->dif0.file; /* some fs like FUSE needs it */ + buf->mapping = buf->file->f_mapping; + } else if (erofs_is_fscache_mode(sb)) +- buf->mapping = sbi->s_fscache->inode->i_mapping; ++ buf->mapping = sbi->dif0.fscache->inode->i_mapping; + else + buf->mapping = sb->s_bdev->bd_mapping; + } +@@ -208,12 +208,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) + erofs_off_t startoff, length; + int id; + +- map->m_bdev = sb->s_bdev; +- map->m_daxdev = EROFS_SB(sb)->dax_dev; +- map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; +- map->m_fscache = EROFS_SB(sb)->s_fscache; +- map->m_fp = EROFS_SB(sb)->fdev; +- ++ erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0); ++ map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */ + if (map->m_deviceid) { + down_read(&devs->rwsem); + dif = idr_find(&devs->tree, map->m_deviceid - 1); +diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c +index fda16eedafb5..ce7e38c82719 100644 +--- a/fs/erofs/fscache.c ++++ b/fs/erofs/fscache.c +@@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb) + if (IS_ERR(fscache)) + return PTR_ERR(fscache); + +- sbi->s_fscache = fscache; ++ sbi->dif0.fscache = fscache; + return 0; + } + +@@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb) + { + struct erofs_sb_info *sbi = EROFS_SB(sb); + +- erofs_fscache_unregister_cookie(sbi->s_fscache); ++ erofs_fscache_unregister_cookie(sbi->dif0.fscache); + + if (sbi->domain) + erofs_fscache_domain_put(sbi->domain); + else + fscache_relinquish_volume(sbi->volume, NULL, false); + +- sbi->s_fscache = NULL; ++ sbi->dif0.fscache = NULL; + sbi->volume = NULL; + sbi->domain = NULL; + } +diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h +index 9b03c8f323a7..d70aa2410472 100644 +--- a/fs/erofs/internal.h ++++ b/fs/erofs/internal.h +@@ -113,6 +113,7 @@ struct erofs_xattr_prefix_item { + }; + + struct erofs_sb_info { ++ struct erofs_device_info dif0; + struct erofs_mount_opts opt; /* options */ + #ifdef CONFIG_EROFS_FS_ZIP + /* list for all registered superblocks, mainly for shrinker */ +@@ -130,13 +131,9 @@ struct erofs_sb_info { + + struct erofs_sb_lz4_info lz4; + #endif /* CONFIG_EROFS_FS_ZIP */ +- struct file *fdev; + struct inode *packed_inode; + struct erofs_dev_context *devs; +- struct dax_device *dax_dev; +- u64 dax_part_off; + u64 total_blocks; +- u32 primarydevice_blocks; + + u32 meta_blkaddr; + #ifdef CONFIG_EROFS_FS_XATTR +@@ -172,7 +169,6 @@ struct erofs_sb_info { + + /* fscache support */ + struct fscache_volume *volume; +- struct erofs_fscache *s_fscache; + struct erofs_domain *domain; + char *fsid; + char *domain_id; +@@ -193,7 +189,7 @@ struct erofs_sb_info { + + static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi) + { +- return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev; ++ return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file; + } + + static inline bool erofs_is_fscache_mode(struct super_block *sb) +diff --git a/fs/erofs/super.c b/fs/erofs/super.c +index c40821346d50..60f7bd43a5a4 100644 +--- a/fs/erofs/super.c ++++ b/fs/erofs/super.c +@@ -218,7 +218,7 @@ static int erofs_scan_devices(struct super_block *sb, + struct erofs_device_info *dif; + int id, err = 0; + +- sbi->total_blocks = sbi->primarydevice_blocks; ++ sbi->total_blocks = sbi->dif0.blocks; + if (!erofs_sb_has_device_table(sbi)) + ondisk_extradevs = 0; + else +@@ -322,7 +322,7 @@ static int erofs_read_superblock(struct super_block *sb) + sbi->sb_size); + goto out; + } +- sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); ++ sbi->dif0.blocks = le32_to_cpu(dsb->blocks); + sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); + #ifdef CONFIG_EROFS_FS_XATTR + sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); +@@ -617,9 +617,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) + return -EINVAL; + } + +- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, +- &sbi->dax_part_off, +- NULL, NULL); ++ sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev, ++ &sbi->dif0.dax_part_off, NULL, NULL); + } + + err = erofs_read_superblock(sb); +@@ -642,7 +641,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) + } + + if (test_opt(&sbi->opt, DAX_ALWAYS)) { +- if (!sbi->dax_dev) { ++ if (!sbi->dif0.dax_dev) { + errorfc(fc, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } else if (sbi->blkszbits != PAGE_SHIFT) { +@@ -722,14 +721,13 @@ static int erofs_fc_get_tree(struct fs_context *fc) + + if (!fc->source) + return invalf(fc, "No source specified"); +- + file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0); + if (IS_ERR(file)) + return PTR_ERR(file); +- sbi->fdev = file; ++ sbi->dif0.file = file; + +- if (S_ISREG(file_inode(sbi->fdev)->i_mode) && +- sbi->fdev->f_mapping->a_ops->read_folio) ++ if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) && ++ sbi->dif0.file->f_mapping->a_ops->read_folio) + return get_tree_nodev(fc, erofs_fc_fill_super); + } + #endif +@@ -786,8 +784,8 @@ static void erofs_sb_free(struct erofs_sb_info *sbi) + erofs_free_dev_context(sbi->devs); + kfree(sbi->fsid); + kfree(sbi->domain_id); +- if (sbi->fdev) +- fput(sbi->fdev); ++ if (sbi->dif0.file) ++ fput(sbi->dif0.file); + kfree(sbi); + } + +@@ -832,11 +830,12 @@ static void erofs_kill_sb(struct super_block *sb) + { + struct erofs_sb_info *sbi = EROFS_SB(sb); + +- if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev) ++ if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || ++ sbi->dif0.file) + kill_anon_super(sb); + else + kill_block_super(sb); +- fs_put_dax(sbi->dax_dev, NULL); ++ fs_put_dax(sbi->dif0.dax_dev, NULL); + erofs_fscache_unregister_fs(sb); + erofs_sb_free(sbi); + sb->s_fs_info = NULL; +-- +2.39.5 + diff --git a/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch b/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch new file mode 100644 index 00000000000..24a1432d447 --- /dev/null +++ b/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch @@ -0,0 +1,141 @@ +From 0a8a7df983ed842d9bb150a8e59bc92a125ec624 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 14:31:08 +0000 +Subject: firmware: arm_ffa: Fix the race around setting ffa_dev->properties + +From: Levi Yun + +[ Upstream commit 6fe437cfe2cdc797b03f63b338a13fac96ed6a08 ] + +Currently, ffa_dev->properties is set after the ffa_device_register() +call return in ffa_setup_partitions(). This could potentially result in +a race where the partition's properties is accessed while probing +struct ffa_device before it is set. + +Update the ffa_device_register() to receive ffa_partition_info so all +the data from the partition information received from the firmware can +be updated into the struct ffa_device before the calling device_register() +in ffa_device_register(). + +Fixes: e781858488b9 ("firmware: arm_ffa: Add initial FFA bus support for device enumeration") +Signed-off-by: Levi Yun +Message-Id: <20241203143109.1030514-2-yeoreum.yun@arm.com> +Signed-off-by: Sudeep Holla +Signed-off-by: Sasha Levin +--- + drivers/firmware/arm_ffa/bus.c | 15 +++++++++++---- + drivers/firmware/arm_ffa/driver.c | 7 +------ + include/linux/arm_ffa.h | 13 ++++++++----- + 3 files changed, 20 insertions(+), 15 deletions(-) + +diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c +index eb17d03b66fe..dfda5ffc14db 100644 +--- a/drivers/firmware/arm_ffa/bus.c ++++ b/drivers/firmware/arm_ffa/bus.c +@@ -187,13 +187,18 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) + return valid; + } + +-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, +- const struct ffa_ops *ops) ++struct ffa_device * ++ffa_device_register(const struct ffa_partition_info *part_info, ++ const struct ffa_ops *ops) + { + int id, ret; ++ uuid_t uuid; + struct device *dev; + struct ffa_device *ffa_dev; + ++ if (!part_info) ++ return NULL; ++ + id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL); + if (id < 0) + return NULL; +@@ -210,9 +215,11 @@ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, + dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id); + + ffa_dev->id = id; +- ffa_dev->vm_id = vm_id; ++ ffa_dev->vm_id = part_info->id; ++ ffa_dev->properties = part_info->properties; + ffa_dev->ops = ops; +- uuid_copy(&ffa_dev->uuid, uuid); ++ import_uuid(&uuid, (u8 *)part_info->uuid); ++ uuid_copy(&ffa_dev->uuid, &uuid); + + ret = device_register(&ffa_dev->dev); + if (ret) { +diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c +index b14cbdae94e8..2c2ec3c35f15 100644 +--- a/drivers/firmware/arm_ffa/driver.c ++++ b/drivers/firmware/arm_ffa/driver.c +@@ -1387,7 +1387,6 @@ static struct notifier_block ffa_bus_nb = { + static int ffa_setup_partitions(void) + { + int count, idx, ret; +- uuid_t uuid; + struct ffa_device *ffa_dev; + struct ffa_dev_part_info *info; + struct ffa_partition_info *pbuf, *tpbuf; +@@ -1406,23 +1405,19 @@ static int ffa_setup_partitions(void) + + xa_init(&drv_info->partition_info); + for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) { +- import_uuid(&uuid, (u8 *)tpbuf->uuid); +- + /* Note that if the UUID will be uuid_null, that will require + * ffa_bus_notifier() to find the UUID of this partition id + * with help of ffa_device_match_uuid(). FF-A v1.1 and above + * provides UUID here for each partition as part of the + * discovery API and the same is passed. + */ +- ffa_dev = ffa_device_register(&uuid, tpbuf->id, &ffa_drv_ops); ++ ffa_dev = ffa_device_register(tpbuf, &ffa_drv_ops); + if (!ffa_dev) { + pr_err("%s: failed to register partition ID 0x%x\n", + __func__, tpbuf->id); + continue; + } + +- ffa_dev->properties = tpbuf->properties; +- + if (drv_info->version > FFA_VERSION_1_0 && + !(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC)) + ffa_mode_32bit_set(ffa_dev); +diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h +index a28e2a6a13d0..74169dd0f659 100644 +--- a/include/linux/arm_ffa.h ++++ b/include/linux/arm_ffa.h +@@ -166,9 +166,12 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev) + return dev_get_drvdata(&fdev->dev); + } + ++struct ffa_partition_info; ++ + #if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT) +-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, +- const struct ffa_ops *ops); ++struct ffa_device * ++ffa_device_register(const struct ffa_partition_info *part_info, ++ const struct ffa_ops *ops); + void ffa_device_unregister(struct ffa_device *ffa_dev); + int ffa_driver_register(struct ffa_driver *driver, struct module *owner, + const char *mod_name); +@@ -176,9 +179,9 @@ void ffa_driver_unregister(struct ffa_driver *driver); + bool ffa_device_is_valid(struct ffa_device *ffa_dev); + + #else +-static inline +-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, +- const struct ffa_ops *ops) ++static inline struct ffa_device * ++ffa_device_register(const struct ffa_partition_info *part_info, ++ const struct ffa_ops *ops) + { + return NULL; + } +-- +2.39.5 + diff --git a/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch b/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch new file mode 100644 index 00000000000..bd6f8ce44fc --- /dev/null +++ b/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch @@ -0,0 +1,82 @@ +From 6430d3a2c3b25314895a943f3de02cb7ebe747b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Nov 2024 00:05:18 +0100 +Subject: firmware: arm_scmi: Fix i.MX build dependency + +From: Arnd Bergmann + +[ Upstream commit 514b2262ade48a0503ac6aa03c3bfb8c5be69b21 ] + +The newly added SCMI vendor driver references functions in the +protocol driver but needs a Kconfig dependency to ensure it can link, +essentially the Kconfig dependency needs to be reversed to match the +link time dependency: + + | arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_write': + | fsl_mqs.c:(.text+0x1aa): undefined reference to `scmi_imx_misc_ctrl_set' + | arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_read': + | fsl_mqs.c:(.text+0x1ee): undefined reference to `scmi_imx_misc_ctrl_get' + +This however only works after changing the dependency in the SND_SOC_FSL_MQS +driver as well, which uses 'select IMX_SCMI_MISC_DRV' to turn on a +driver it depends on. This is generally a bad idea, so the best solution +is to change that into a dependency. + +To allow the ASoC driver to keep building with the SCMI support, this +needs to be an optional dependency that enforces the link-time +dependency if IMX_SCMI_MISC_DRV is a loadable module but not +depend on it if that is disabled. + +Fixes: 61c9f03e22fc ("firmware: arm_scmi: Add initial support for i.MX MISC protocol") +Fixes: 101c9023594a ("ASoC: fsl_mqs: Support accessing registers by scmi interface") +Signed-off-by: Arnd Bergmann +Acked-by: Mark Brown +Acked-by: Shengjiu Wang +Message-Id: <20241115230555.2435004-1-arnd@kernel.org> +Signed-off-by: Sudeep Holla +Signed-off-by: Sasha Levin +--- + drivers/firmware/arm_scmi/vendors/imx/Kconfig | 1 + + drivers/firmware/imx/Kconfig | 1 - + sound/soc/fsl/Kconfig | 1 + + 3 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/firmware/arm_scmi/vendors/imx/Kconfig b/drivers/firmware/arm_scmi/vendors/imx/Kconfig +index 2883ed24a84d..a01bf5e47301 100644 +--- a/drivers/firmware/arm_scmi/vendors/imx/Kconfig ++++ b/drivers/firmware/arm_scmi/vendors/imx/Kconfig +@@ -15,6 +15,7 @@ config IMX_SCMI_BBM_EXT + config IMX_SCMI_MISC_EXT + tristate "i.MX SCMI MISC EXTENSION" + depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF) ++ depends on IMX_SCMI_MISC_DRV + default y if ARCH_MXC + help + This enables i.MX System MISC control logic such as gpio expander +diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig +index 477d3f32d99a..907cd149c40a 100644 +--- a/drivers/firmware/imx/Kconfig ++++ b/drivers/firmware/imx/Kconfig +@@ -25,7 +25,6 @@ config IMX_SCU + + config IMX_SCMI_MISC_DRV + tristate "IMX SCMI MISC Protocol driver" +- depends on IMX_SCMI_MISC_EXT || COMPILE_TEST + default y if ARCH_MXC + help + The System Controller Management Interface firmware (SCMI FW) is +diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig +index e283751abfef..678540b78280 100644 +--- a/sound/soc/fsl/Kconfig ++++ b/sound/soc/fsl/Kconfig +@@ -29,6 +29,7 @@ config SND_SOC_FSL_SAI + config SND_SOC_FSL_MQS + tristate "Medium Quality Sound (MQS) module support" + depends on SND_SOC_FSL_SAI ++ depends on IMX_SCMI_MISC_DRV || !IMX_SCMI_MISC_DRV + select REGMAP_MMIO + help + Say Y if you want to add Medium Quality Sound (MQS) +-- +2.39.5 + diff --git a/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch b/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch new file mode 100644 index 00000000000..a4d6292d9d0 --- /dev/null +++ b/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch @@ -0,0 +1,48 @@ +From 0fdafff1cad66f14ff6ce0e4434ab26ae0d1e4e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 Dec 2024 00:19:34 +0100 +Subject: i2c: pnx: Fix timeout in wait functions + +From: Vladimir Riabchun + +[ Upstream commit 7363f2d4c18557c99c536b70489187bb4e05c412 ] + +Since commit f63b94be6942 ("i2c: pnx: Fix potential deadlock warning +from del_timer_sync() call in isr") jiffies are stored in +i2c_pnx_algo_data.timeout, but wait_timeout and wait_reset are still +using it as milliseconds. Convert jiffies back to milliseconds to wait +for the expected amount of time. + +Fixes: f63b94be6942 ("i2c: pnx: Fix potential deadlock warning from del_timer_sync() call in isr") +Signed-off-by: Vladimir Riabchun +Signed-off-by: Andi Shyti +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-pnx.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c +index 1dafadda73af..135300f3b534 100644 +--- a/drivers/i2c/busses/i2c-pnx.c ++++ b/drivers/i2c/busses/i2c-pnx.c +@@ -95,7 +95,7 @@ enum { + + static inline int wait_timeout(struct i2c_pnx_algo_data *data) + { +- long timeout = data->timeout; ++ long timeout = jiffies_to_msecs(data->timeout); + while (timeout > 0 && + (ioread32(I2C_REG_STS(data)) & mstatus_active)) { + mdelay(1); +@@ -106,7 +106,7 @@ static inline int wait_timeout(struct i2c_pnx_algo_data *data) + + static inline int wait_reset(struct i2c_pnx_algo_data *data) + { +- long timeout = data->timeout; ++ long timeout = jiffies_to_msecs(data->timeout); + while (timeout > 0 && + (ioread32(I2C_REG_CTL(data)) & mcntrl_reset)) { + mdelay(1); +-- +2.39.5 + diff --git a/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch b/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch new file mode 100644 index 00000000000..4b8cac06297 --- /dev/null +++ b/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch @@ -0,0 +1,83 @@ +From 7db091e222bec2ba7fbad34cbcf8a5195bfdf7b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 12:40:11 +0000 +Subject: net: stmmac: fix TSO DMA API usage causing oops + +From: Russell King (Oracle) + +[ Upstream commit 4c49f38e20a57f8abaebdf95b369295b153d1f8e ] + +Commit 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap +for non-paged SKB data") moved the assignment of tx_skbuff_dma[]'s +members to be later in stmmac_tso_xmit(). + +The buf (dma cookie) and len stored in this structure are passed to +dma_unmap_single() by stmmac_tx_clean(). The DMA API requires that +the dma cookie passed to dma_unmap_single() is the same as the value +returned from dma_map_single(). However, by moving the assignment +later, this is not the case when priv->dma_cap.addr64 > 32 as "des" +is offset by proto_hdr_len. + +This causes problems such as: + + dwc-eth-dwmac 2490000.ethernet eth0: Tx DMA map failed + +and with DMA_API_DEBUG enabled: + + DMA-API: dwc-eth-dwmac 2490000.ethernet: device driver tries to +free DMA memory it has not allocated [device address=0x000000ffffcf65c0] [size=66 bytes] + +Fix this by maintaining "des" as the original DMA cookie, and use +tso_des to pass the offset DMA cookie to stmmac_tso_allocator(). + +Full details of the crashes can be found at: +https://lore.kernel.org/all/d8112193-0386-4e14-b516-37c2d838171a@nvidia.com/ +https://lore.kernel.org/all/klkzp5yn5kq5efgtrow6wbvnc46bcqfxs65nz3qy77ujr5turc@bwwhelz2l4dw/ + +Reported-by: Jon Hunter +Reported-by: Thierry Reding +Fixes: 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap for non-paged SKB data") +Tested-by: Jon Hunter +Signed-off-by: Russell King (Oracle) +Reviewed-by: Furong Xu <0x1207@gmail.com> +Link: https://patch.msgid.link/E1tJXcx-006N4Z-PC@rmk-PC.armlinux.org.uk +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 766213ee82c1..cf7b59b8cc64 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -4220,8 +4220,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) + struct stmmac_txq_stats *txq_stats; + struct stmmac_tx_queue *tx_q; + u32 pay_len, mss, queue; ++ dma_addr_t tso_des, des; + u8 proto_hdr_len, hdr; +- dma_addr_t des; + bool set_ic; + int i; + +@@ -4317,14 +4317,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) + + /* If needed take extra descriptors to fill the remaining payload */ + tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; ++ tso_des = des; + } else { + stmmac_set_desc_addr(priv, first, des); + tmp_pay_len = pay_len; +- des += proto_hdr_len; ++ tso_des = des + proto_hdr_len; + pay_len = 0; + } + +- stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); ++ stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue); + + /* In case two or more DMA transmit descriptors are allocated for this + * non-paged SKB data, the DMA buffer address should be saved to +-- +2.39.5 + diff --git a/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch b/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch new file mode 100644 index 00000000000..d11e947165c --- /dev/null +++ b/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch @@ -0,0 +1,136 @@ +From 95a23eb5715a3a7d024f9e4171b17925524c53ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Nov 2024 09:28:36 +0900 +Subject: p2sb: Do not scan and remove the P2SB device when it is unhidden +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shin'ichiro Kawasaki + +[ Upstream commit 360c400d0f568636c1b98d1d5f9f49aa3d420c70 ] + +When drivers access P2SB device resources, it calls p2sb_bar(). Before +the commit 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls +during PCI device probe"), p2sb_bar() obtained the resources and then +called pci_stop_and_remove_bus_device() for clean up. Then the P2SB +device disappeared. The commit 5913320eb0b3 introduced the P2SB device +resource cache feature in the boot process. During the resource cache, +pci_stop_and_remove_bus_device() is called for the P2SB device, then the +P2SB device disappears regardless of whether p2sb_bar() is called or +not. Such P2SB device disappearance caused a confusion [1]. To avoid the +confusion, avoid the pci_stop_and_remove_bus_device() call when the BIOS +does not hide the P2SB device. + +For that purpose, cache the P2SB device resources only if the BIOS hides +the P2SB device. Call p2sb_scan_and_cache() only if p2sb_hidden_by_bios +is true. This allows removing two branches from p2sb_scan_and_cache(). +When p2sb_bar() is called, get the resources from the cache if the P2SB +device is hidden. Otherwise, read the resources from the unhidden P2SB +device. + +Reported-by: Daniel Walker (danielwa) +Closes: https://lore.kernel.org/lkml/ZzTI+biIUTvFT6NC@goliath/ [1] +Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") +Signed-off-by: Shin'ichiro Kawasaki +Reviewed-by: Hans de Goede +Link: https://lore.kernel.org/r/20241128002836.373745-5-shinichiro.kawasaki@wdc.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/p2sb.c | 42 +++++++++++++++++++++++++++++-------- + 1 file changed, 33 insertions(+), 9 deletions(-) + +diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c +index 0bc6b21c4c20..c56650b9ff96 100644 +--- a/drivers/platform/x86/p2sb.c ++++ b/drivers/platform/x86/p2sb.c +@@ -100,10 +100,8 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. +- * Unhide the P2SB device here, if needed. + */ +- if (p2sb_hidden_by_bios) +- pci_bus_write_config_dword(bus, devfn, P2SBC, 0); ++ pci_bus_write_config_dword(bus, devfn, P2SBC, 0); + + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); +@@ -112,9 +110,7 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); + +- /* Hide the P2SB device, if it was hidden */ +- if (p2sb_hidden_by_bios) +- pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE); ++ pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE); + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; +@@ -141,7 +137,7 @@ static int p2sb_cache_resources(void) + u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; +- int ret; ++ int ret = 0; + + /* Get devfn for P2SB device itself */ + p2sb_get_devfn(&devfn_p2sb); +@@ -167,7 +163,12 @@ static int p2sb_cache_resources(void) + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + p2sb_hidden_by_bios = value & P2SBC_HIDE; + +- ret = p2sb_scan_and_cache(bus, devfn_p2sb); ++ /* ++ * If the BIOS does not hide the P2SB device then its resources ++ * are accesilble. Cache them only if the P2SB device is hidden. ++ */ ++ if (p2sb_hidden_by_bios) ++ ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + pci_unlock_rescan_remove(); + +@@ -190,6 +191,26 @@ static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn, + return 0; + } + ++static int p2sb_read_from_dev(struct pci_bus *bus, unsigned int devfn, ++ struct resource *mem) ++{ ++ struct pci_dev *pdev; ++ int ret = 0; ++ ++ pdev = pci_get_slot(bus, devfn); ++ if (!pdev) ++ return -ENODEV; ++ ++ if (p2sb_valid_resource(pci_resource_n(pdev, 0))) ++ p2sb_read_bar0(pdev, mem); ++ else ++ ret = -ENOENT; ++ ++ pci_dev_put(pdev); ++ ++ return ret; ++} ++ + /** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with +@@ -213,7 +234,10 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) + if (!devfn) + p2sb_get_devfn(&devfn); + +- return p2sb_read_from_cache(bus, devfn, mem); ++ if (p2sb_hidden_by_bios) ++ return p2sb_read_from_cache(bus, devfn, mem); ++ ++ return p2sb_read_from_dev(bus, devfn, mem); + } + EXPORT_SYMBOL_GPL(p2sb_bar); + +-- +2.39.5 + diff --git a/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch b/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch new file mode 100644 index 00000000000..f41dff32294 --- /dev/null +++ b/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch @@ -0,0 +1,82 @@ +From 27398c9595ae269e34ca424295794a655b0c9519 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Nov 2024 09:28:33 +0900 +Subject: p2sb: Factor out p2sb_read_from_cache() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shin'ichiro Kawasaki + +[ Upstream commit 9244524d60ddea55f4df54c51200e8fef2032447 ] + +To prepare for the following fix, factor out the code to read the P2SB +resource from the cache to the new function p2sb_read_from_cache(). + +Signed-off-by: Shin'ichiro Kawasaki +Reviewed-by: Hans de Goede +Link: https://lore.kernel.org/r/20241128002836.373745-2-shinichiro.kawasaki@wdc.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden") +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/p2sb.c | 28 +++++++++++++++++----------- + 1 file changed, 17 insertions(+), 11 deletions(-) + +diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c +index 31f38309b389..aa34b8a69bc1 100644 +--- a/drivers/platform/x86/p2sb.c ++++ b/drivers/platform/x86/p2sb.c +@@ -171,6 +171,22 @@ static int p2sb_cache_resources(void) + return ret; + } + ++static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn, ++ struct resource *mem) ++{ ++ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; ++ ++ if (cache->bus_dev_id != bus->dev.id) ++ return -ENODEV; ++ ++ if (!p2sb_valid_resource(&cache->res)) ++ return -ENOENT; ++ ++ memcpy(mem, &cache->res, sizeof(*mem)); ++ ++ return 0; ++} ++ + /** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with +@@ -187,8 +203,6 @@ static int p2sb_cache_resources(void) + */ + int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) + { +- struct p2sb_res_cache *cache; +- + bus = p2sb_get_bus(bus); + if (!bus) + return -ENODEV; +@@ -196,15 +210,7 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) + if (!devfn) + p2sb_get_devfn(&devfn); + +- cache = &p2sb_resources[PCI_FUNC(devfn)]; +- if (cache->bus_dev_id != bus->dev.id) +- return -ENODEV; +- +- if (!p2sb_valid_resource(&cache->res)) +- return -ENOENT; +- +- memcpy(mem, &cache->res, sizeof(*mem)); +- return 0; ++ return p2sb_read_from_cache(bus, devfn, mem); + } + EXPORT_SYMBOL_GPL(p2sb_bar); + +-- +2.39.5 + diff --git a/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch b/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch new file mode 100644 index 00000000000..40b0a5bb675 --- /dev/null +++ b/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch @@ -0,0 +1,60 @@ +From 1da75876350e6c54be662f7dbf00e4b2261190e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Nov 2024 09:28:34 +0900 +Subject: p2sb: Introduce the global flag p2sb_hidden_by_bios +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shin'ichiro Kawasaki + +[ Upstream commit ae3e6ebc5ab046d434c05c58a3e3f7e94441fec2 ] + +To prepare for the following fix, introduce the global flag +p2sb_hidden_by_bios. Check if the BIOS hides the P2SB device and store +the result in the flag. This allows to refer to the check result across +functions. + +Signed-off-by: Shin'ichiro Kawasaki +Reviewed-by: Hans de Goede +Link: https://lore.kernel.org/r/20241128002836.373745-3-shinichiro.kawasaki@wdc.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden") +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/p2sb.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c +index aa34b8a69bc1..273ac90c8fbd 100644 +--- a/drivers/platform/x86/p2sb.c ++++ b/drivers/platform/x86/p2sb.c +@@ -42,6 +42,7 @@ struct p2sb_res_cache { + }; + + static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; ++static bool p2sb_hidden_by_bios; + + static void p2sb_get_devfn(unsigned int *devfn) + { +@@ -157,13 +158,14 @@ static int p2sb_cache_resources(void) + * Unhide the P2SB device here, if needed. + */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); +- if (value & P2SBC_HIDE) ++ p2sb_hidden_by_bios = value & P2SBC_HIDE; ++ if (p2sb_hidden_by_bios) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ +- if (value & P2SBC_HIDE) ++ if (p2sb_hidden_by_bios) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + + pci_unlock_rescan_remove(); +-- +2.39.5 + diff --git a/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch b/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch new file mode 100644 index 00000000000..30a5f893075 --- /dev/null +++ b/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch @@ -0,0 +1,82 @@ +From 2db2be13b4ed140c36ac1a0b515d000ee3274e6d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Nov 2024 09:28:35 +0900 +Subject: p2sb: Move P2SB hide and unhide code to p2sb_scan_and_cache() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shin'ichiro Kawasaki + +[ Upstream commit 0286070c74ee48391fc07f7f617460479472d221 ] + +To prepare for the following fix, move the code to hide and unhide the +P2SB device from p2sb_cache_resources() to p2sb_scan_and_cache(). + +Signed-off-by: Shin'ichiro Kawasaki +Reviewed-by: Hans de Goede +Link: https://lore.kernel.org/r/20241128002836.373745-4-shinichiro.kawasaki@wdc.com +Reviewed-by: Ilpo Järvinen +Signed-off-by: Ilpo Järvinen +Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden") +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/p2sb.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c +index 273ac90c8fbd..0bc6b21c4c20 100644 +--- a/drivers/platform/x86/p2sb.c ++++ b/drivers/platform/x86/p2sb.c +@@ -97,6 +97,14 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) + + static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) + { ++ /* ++ * The BIOS prevents the P2SB device from being enumerated by the PCI ++ * subsystem, so we need to unhide and hide it back to lookup the BAR. ++ * Unhide the P2SB device here, if needed. ++ */ ++ if (p2sb_hidden_by_bios) ++ pci_bus_write_config_dword(bus, devfn, P2SBC, 0); ++ + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + +@@ -104,6 +112,10 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); + ++ /* Hide the P2SB device, if it was hidden */ ++ if (p2sb_hidden_by_bios) ++ pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE); ++ + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; + +@@ -152,22 +164,11 @@ static int p2sb_cache_resources(void) + */ + pci_lock_rescan_remove(); + +- /* +- * The BIOS prevents the P2SB device from being enumerated by the PCI +- * subsystem, so we need to unhide and hide it back to lookup the BAR. +- * Unhide the P2SB device here, if needed. +- */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + p2sb_hidden_by_bios = value & P2SBC_HIDE; +- if (p2sb_hidden_by_bios) +- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + +- /* Hide the P2SB device, if it was hidden */ +- if (p2sb_hidden_by_bios) +- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); +- + pci_unlock_rescan_remove(); + + return ret; +-- +2.39.5 + diff --git a/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch b/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch new file mode 100644 index 00000000000..67c91be536f --- /dev/null +++ b/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch @@ -0,0 +1,39 @@ +From e357b868a26c8404b85881e96039e0d7dbf67171 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Nov 2024 04:18:40 +0000 +Subject: RISC-V: KVM: Fix csr_write -> csr_set for HVIEN PMU overflow bit + +From: Michael Neuling + +[ Upstream commit ea6398a5af81e3e7fb3da5d261694d479a321fd9 ] + +This doesn't cause a problem currently as HVIEN isn't used elsewhere +yet. Found by inspection. + +Signed-off-by: Michael Neuling +Fixes: 16b0bde9a37c ("RISC-V: KVM: Add perf sampling support for guests") +Reviewed-by: Atish Patra +Reviewed-by: Anup Patel +Link: https://lore.kernel.org/r/20241127041840.419940-1-michaelneuling@tenstorrent.com +Signed-off-by: Anup Patel +Signed-off-by: Sasha Levin +--- + arch/riscv/kvm/aia.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c +index 2967d305c442..9f3b527596de 100644 +--- a/arch/riscv/kvm/aia.c ++++ b/arch/riscv/kvm/aia.c +@@ -552,7 +552,7 @@ void kvm_riscv_aia_enable(void) + csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) +- csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); ++ csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF)); + } + + void kvm_riscv_aia_disable(void) +-- +2.39.5 + diff --git a/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch b/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch new file mode 100644 index 00000000000..2835ebbaee3 --- /dev/null +++ b/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch @@ -0,0 +1,38 @@ +From 0d7624d8f519497f54bf832e96a0d8646cc8fe01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 17:43:48 +0100 +Subject: s390/ipl: Fix never less than zero warning + +From: Alexander Gordeev + +[ Upstream commit 5fa49dd8e521a42379e5e41fcf2c92edaaec0a8b ] + +DEFINE_IPL_ATTR_STR_RW() macro produces "unsigned 'len' is never less +than zero." warning when sys_vmcmd_on_*_store() callbacks are defined. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202412081614.5uel8F6W-lkp@intel.com/ +Fixes: 247576bf624a ("s390/ipl: Do not accept z/VM CP diag X'008' cmds longer than max length") +Reviewed-by: Heiko Carstens +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/ipl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c +index f17bb7bf9392..5fa203f4bc6b 100644 +--- a/arch/s390/kernel/ipl.c ++++ b/arch/s390/kernel/ipl.c +@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + if (len >= sizeof(_value)) \ + return -E2BIG; \ + len = strscpy(_value, buf, sizeof(_value)); \ +- if (len < 0) \ ++ if ((ssize_t)len < 0) \ + return len; \ + strim(_value); \ + return len; \ +-- +2.39.5 + diff --git a/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch b/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch new file mode 100644 index 00000000000..361a0520777 --- /dev/null +++ b/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch @@ -0,0 +1,41 @@ +From 025f5a1045222edc3b8a26a391636ca98d5dbcdf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 12:35:34 +0100 +Subject: s390/mm: Consider KMSAN modules metadata for paging levels + +From: Vasily Gorbik + +[ Upstream commit 282da38b465395c930687974627c24f47ddce5ff ] + +The calculation determining whether to use three- or four-level paging +didn't account for KMSAN modules metadata. Include this metadata in the +virtual memory size calculation to ensure correct paging mode selection +and avoiding potentially unnecessary physical memory size limitations. + +Fixes: 65ca73f9fb36 ("s390/mm: define KMSAN metadata for vmalloc and modules") +Acked-by: Heiko Carstens +Reviewed-by: Alexander Gordeev +Reviewed-by: Ilya Leoshkevich +Signed-off-by: Vasily Gorbik +Signed-off-by: Alexander Gordeev +Signed-off-by: Sasha Levin +--- + arch/s390/boot/startup.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c +index c8f149ad77e5..c2ee0745f59e 100644 +--- a/arch/s390/boot/startup.c ++++ b/arch/s390/boot/startup.c +@@ -231,6 +231,8 @@ static unsigned long get_vmem_size(unsigned long identity_size, + vsize = round_up(SZ_2G + max_mappable, rte_size) + + round_up(vmemmap_size, rte_size) + + FIXMAP_SIZE + MODULES_LEN + KASLR_LEN; ++ if (IS_ENABLED(CONFIG_KMSAN)) ++ vsize += MODULES_LEN * 2; + return size_add(vsize, vmalloc_size); + } + +-- +2.39.5 + diff --git a/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch b/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch new file mode 100644 index 00000000000..c90f6342db6 --- /dev/null +++ b/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch @@ -0,0 +1,164 @@ +From 44c8bb66dcae7fbaaa7077cd0e045d83aac1b658 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 22:22:36 -0500 +Subject: sched/dlserver: Fix dlserver double enqueue + +From: Vineeth Pillai (Google) + +[ Upstream commit b53127db1dbf7f1047cf35c10922d801dcd40324 ] + +dlserver can get dequeued during a dlserver pick_task due to the delayed +deueue feature and this can lead to issues with dlserver logic as it +still thinks that dlserver is on the runqueue. The dlserver throttling +and replenish logic gets confused and can lead to double enqueue of +dlserver. + +Double enqueue of dlserver could happend due to couple of reasons: + +Case 1 +------ + +Delayed dequeue feature[1] can cause dlserver being stopped during a +pick initiated by dlserver: + __pick_next_task + pick_task_dl -> server_pick_task + pick_task_fair + pick_next_entity (if (sched_delayed)) + dequeue_entities + dl_server_stop + +server_pick_task goes ahead with update_curr_dl_se without knowing that +dlserver is dequeued and this confuses the logic and may lead to +unintended enqueue while the server is stopped. + +Case 2 +------ +A race condition between a task dequeue on one cpu and same task's enqueue +on this cpu by a remote cpu while the lock is released causing dlserver +double enqueue. + +One cpu would be in the schedule() and releasing RQ-lock: + +current->state = TASK_INTERRUPTIBLE(); + schedule(); + deactivate_task() + dl_stop_server(); + pick_next_task() + pick_next_task_fair() + sched_balance_newidle() + rq_unlock(this_rq) + +at which point another CPU can take our RQ-lock and do: + + try_to_wake_up() + ttwu_queue() + rq_lock() + ... + activate_task() + dl_server_start() --> first enqueue + wakeup_preempt() := check_preempt_wakeup_fair() + update_curr() + update_curr_task() + if (current->dl_server) + dl_server_update() + enqueue_dl_entity() --> second enqueue + +This bug was not apparent as the enqueue in dl_server_start doesn't +usually happen because of the defer logic. But as a side effect of the +first case(dequeue during dlserver pick), dl_throttled and dl_yield will +be set and this causes the time accounting of dlserver to messup and +then leading to a enqueue in dl_server_start. + +Have an explicit flag representing the status of dlserver to avoid the +confusion. This is set in dl_server_start and reset in dlserver_stop. + +Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") +Suggested-by: Peter Zijlstra +Signed-off-by: "Vineeth Pillai (Google)" +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Marcel Ziswiler # ROCK 5B +Link: https://lkml.kernel.org/r/20241213032244.877029-1-vineeth@bitbyteword.org +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 7 +++++++ + kernel/sched/deadline.c | 8 ++++++-- + kernel/sched/sched.h | 5 +++++ + 3 files changed, 18 insertions(+), 2 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index bb343136ddd0..c14446c6164d 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -656,6 +656,12 @@ struct sched_dl_entity { + * @dl_defer_armed tells if the deferrable server is waiting + * for the replenishment timer to activate it. + * ++ * @dl_server_active tells if the dlserver is active(started). ++ * dlserver is started on first cfs enqueue on an idle runqueue ++ * and is stopped when a dequeue results in 0 cfs tasks on the ++ * runqueue. In other words, dlserver is active only when cpu's ++ * runqueue has atleast one cfs task. ++ * + * @dl_defer_running tells if the deferrable server is actually + * running, skipping the defer phase. + */ +@@ -664,6 +670,7 @@ struct sched_dl_entity { + unsigned int dl_non_contending : 1; + unsigned int dl_overrun : 1; + unsigned int dl_server : 1; ++ unsigned int dl_server_active : 1; + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index fc6f41ac33eb..a17c23b53049 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) + if (!dl_se->dl_runtime) + return; + ++ dl_se->dl_server_active = 1; + enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); + if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) + resched_curr(dl_se->rq); +@@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + hrtimer_try_to_cancel(&dl_se->dl_timer); + dl_se->dl_defer_armed = 0; + dl_se->dl_throttled = 0; ++ dl_se->dl_server_active = 0; + } + + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, +@@ -2420,8 +2422,10 @@ static struct task_struct *__pick_task_dl(struct rq *rq) + if (dl_server(dl_se)) { + p = dl_se->server_pick_task(dl_se); + if (!p) { +- dl_se->dl_yielded = 1; +- update_curr_dl_se(rq, dl_se, 0); ++ if (dl_server_active(dl_se)) { ++ dl_se->dl_yielded = 1; ++ update_curr_dl_se(rq, dl_se, 0); ++ } + goto again; + } + rq->dl_server = dl_se; +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index c53696275ca1..f2ef520513c4 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -398,6 +398,11 @@ extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq + extern int dl_server_apply_params(struct sched_dl_entity *dl_se, + u64 runtime, u64 period, bool init); + ++static inline bool dl_server_active(struct sched_dl_entity *dl_se) ++{ ++ return dl_se->dl_server_active; ++} ++ + #ifdef CONFIG_CGROUP_SCHED + + extern struct list_head task_groups; +-- +2.39.5 + diff --git a/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch b/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch new file mode 100644 index 00000000000..37db719d34d --- /dev/null +++ b/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch @@ -0,0 +1,70 @@ +From c54ff10a7b8301b633e1aa2769b493f1ab170b56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 22:22:37 -0500 +Subject: sched/dlserver: Fix dlserver time accounting + +From: Vineeth Pillai (Google) + +[ Upstream commit c7f7e9c73178e0e342486fd31e7f363ef60e3f83 ] + +dlserver time is accounted when: + - dlserver is active and the dlserver proxies the cfs task. + - dlserver is active but deferred and cfs task runs after being picked + through the normal fair class pick. + +dl_server_update is called in two places to make sure that both the +above times are accounted for. But it doesn't check if dlserver is +active or not. Now that we have this dl_server_active flag, we can +consolidate dl_server_update into one place and all we need to check is +whether dlserver is active or not. When dlserver is active there is only +two possible conditions: + - dlserver is deferred. + - cfs task is running on behalf of dlserver. + +Fixes: a110a81c52a9 ("sched/deadline: Deferrable dl server") +Signed-off-by: "Vineeth Pillai (Google)" +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Marcel Ziswiler # ROCK 5B +Link: https://lore.kernel.org/r/20241213032244.877029-2-vineeth@bitbyteword.org +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 93142f9077c7..1ca96c99872f 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1159,8 +1159,6 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec) + trace_sched_stat_runtime(p, delta_exec); + account_group_exec_runtime(p, delta_exec); + cgroup_account_cputime(p, delta_exec); +- if (p->dl_server) +- dl_server_update(p->dl_server, delta_exec); + } + + static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -1237,11 +1235,16 @@ static void update_curr(struct cfs_rq *cfs_rq) + update_curr_task(p, delta_exec); + + /* +- * Any fair task that runs outside of fair_server should +- * account against fair_server such that it can account for +- * this time and possibly avoid running this period. ++ * If the fair_server is active, we need to account for the ++ * fair_server time whether or not the task is running on ++ * behalf of fair_server or not: ++ * - If the task is running on behalf of fair_server, we need ++ * to limit its time based on the assigned runtime. ++ * - Fair task that runs outside of fair_server should account ++ * against fair_server such that it can account for this time ++ * and possibly avoid running this period. + */ +- if (p->dl_server != &rq->fair_server) ++ if (dl_server_active(&rq->fair_server)) + dl_server_update(&rq->fair_server, delta_exec); + } + +-- +2.39.5 + diff --git a/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch b/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch new file mode 100644 index 00000000000..61068152fdb --- /dev/null +++ b/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch @@ -0,0 +1,282 @@ +From 6d6343fb58e0c45c04860781661503dcde5fbd6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Dec 2024 18:45:57 +0100 +Subject: sched/eevdf: More PELT vs DELAYED_DEQUEUE + +From: Peter Zijlstra + +[ Upstream commit 76f2f783294d7d55c2564e2dfb0a7279ba0bc264 ] + +Vincent and Dietmar noted that while +commit fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE") fixes +the entity runnable stats, it does not adjust the cfs_rq runnable stats, +which are based off of h_nr_running. + +Track h_nr_delayed such that we can discount those and adjust the +signal. + +Fixes: fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE") +Closes: https://lore.kernel.org/lkml/a9a45193-d0c6-4ba2-a822-464ad30b550e@arm.com/ +Closes: https://lore.kernel.org/lkml/CAKfTPtCNUvWE_GX5LyvTF-WdxUT=ZgvZZv-4t=eWntg5uOFqiQ@mail.gmail.com/ +[ Fixes checkpatch warnings and rebased ] +Signed-off-by: Peter Zijlstra (Intel) +Reported-by: Dietmar Eggemann +Reported-by: Vincent Guittot +Signed-off-by: "Peter Zijlstra (Intel)" +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: K Prateek Nayak +Link: https://lore.kernel.org/r/20241202174606.4074512-3-vincent.guittot@linaro.org +Signed-off-by: Sasha Levin +--- + kernel/sched/debug.c | 1 + + kernel/sched/fair.c | 51 +++++++++++++++++++++++++++++++++++++++----- + kernel/sched/pelt.c | 2 +- + kernel/sched/sched.h | 8 +++++-- + 4 files changed, 54 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index f4035c7a0fa1..82b165bf48c4 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -844,6 +844,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); + SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); ++ SEQ_printf(m, " .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed); + SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", + cfs_rq->idle_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running", +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index c467e389cd6f..93142f9077c7 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5471,9 +5471,33 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) + + static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); + +-static inline void finish_delayed_dequeue_entity(struct sched_entity *se) ++static void set_delayed(struct sched_entity *se) ++{ ++ se->sched_delayed = 1; ++ for_each_sched_entity(se) { ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ ++ cfs_rq->h_nr_delayed++; ++ if (cfs_rq_throttled(cfs_rq)) ++ break; ++ } ++} ++ ++static void clear_delayed(struct sched_entity *se) + { + se->sched_delayed = 0; ++ for_each_sched_entity(se) { ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ ++ cfs_rq->h_nr_delayed--; ++ if (cfs_rq_throttled(cfs_rq)) ++ break; ++ } ++} ++ ++static inline void finish_delayed_dequeue_entity(struct sched_entity *se) ++{ ++ clear_delayed(se); + if (sched_feat(DELAY_ZERO) && se->vlag > 0) + se->vlag = 0; + } +@@ -5502,7 +5526,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + if (sched_feat(DELAY_DEQUEUE) && delay && + !entity_eligible(cfs_rq, se)) { + update_load_avg(cfs_rq, se, 0); +- se->sched_delayed = 1; ++ set_delayed(se); + return false; + } + } +@@ -5920,7 +5944,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; +- long task_delta, idle_task_delta, dequeue = 1; ++ long task_delta, idle_task_delta, delayed_delta, dequeue = 1; + long rq_h_nr_running = rq->cfs.h_nr_running; + + raw_spin_lock(&cfs_b->lock); +@@ -5953,6 +5977,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; ++ delayed_delta = cfs_rq->h_nr_delayed; + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + int flags; +@@ -5976,6 +6001,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + qcfs_rq->h_nr_running -= task_delta; + qcfs_rq->idle_h_nr_running -= idle_task_delta; ++ qcfs_rq->h_nr_delayed -= delayed_delta; + + if (qcfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ +@@ -5998,6 +6024,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + qcfs_rq->h_nr_running -= task_delta; + qcfs_rq->idle_h_nr_running -= idle_task_delta; ++ qcfs_rq->h_nr_delayed -= delayed_delta; + } + + /* At this point se is NULL and we are at root level*/ +@@ -6023,7 +6050,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + struct rq *rq = rq_of(cfs_rq); + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; +- long task_delta, idle_task_delta; ++ long task_delta, idle_task_delta, delayed_delta; + long rq_h_nr_running = rq->cfs.h_nr_running; + + se = cfs_rq->tg->se[cpu_of(rq)]; +@@ -6059,6 +6086,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + + task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; ++ delayed_delta = cfs_rq->h_nr_delayed; + for_each_sched_entity(se) { + struct cfs_rq *qcfs_rq = cfs_rq_of(se); + +@@ -6076,6 +6104,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + + qcfs_rq->h_nr_running += task_delta; + qcfs_rq->idle_h_nr_running += idle_task_delta; ++ qcfs_rq->h_nr_delayed += delayed_delta; + + /* end evaluation on encountering a throttled cfs_rq */ + if (cfs_rq_throttled(qcfs_rq)) +@@ -6093,6 +6122,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + + qcfs_rq->h_nr_running += task_delta; + qcfs_rq->idle_h_nr_running += idle_task_delta; ++ qcfs_rq->h_nr_delayed += delayed_delta; + + /* end evaluation on encountering a throttled cfs_rq */ + if (cfs_rq_throttled(qcfs_rq)) +@@ -6946,7 +6976,7 @@ requeue_delayed_entity(struct sched_entity *se) + } + + update_load_avg(cfs_rq, se, 0); +- se->sched_delayed = 0; ++ clear_delayed(se); + } + + /* +@@ -6960,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) + struct cfs_rq *cfs_rq; + struct sched_entity *se = &p->se; + int idle_h_nr_running = task_has_idle_policy(p); ++ int h_nr_delayed = 0; + int task_new = !(flags & ENQUEUE_WAKEUP); + int rq_h_nr_running = rq->cfs.h_nr_running; + u64 slice = 0; +@@ -6986,6 +7017,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) + if (p->in_iowait) + cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); + ++ if (task_new) ++ h_nr_delayed = !!se->sched_delayed; ++ + for_each_sched_entity(se) { + if (se->on_rq) { + if (se->sched_delayed) +@@ -7008,6 +7042,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) + + cfs_rq->h_nr_running++; + cfs_rq->idle_h_nr_running += idle_h_nr_running; ++ cfs_rq->h_nr_delayed += h_nr_delayed; + + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; +@@ -7031,6 +7066,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) + + cfs_rq->h_nr_running++; + cfs_rq->idle_h_nr_running += idle_h_nr_running; ++ cfs_rq->h_nr_delayed += h_nr_delayed; + + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = 1; +@@ -7093,6 +7129,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + struct task_struct *p = NULL; + int idle_h_nr_running = 0; + int h_nr_running = 0; ++ int h_nr_delayed = 0; + struct cfs_rq *cfs_rq; + u64 slice = 0; + +@@ -7100,6 +7137,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + p = task_of(se); + h_nr_running = 1; + idle_h_nr_running = task_has_idle_policy(p); ++ if (!task_sleep && !task_delayed) ++ h_nr_delayed = !!se->sched_delayed; + } else { + cfs_rq = group_cfs_rq(se); + slice = cfs_rq_min_slice(cfs_rq); +@@ -7117,6 +7156,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + + cfs_rq->h_nr_running -= h_nr_running; + cfs_rq->idle_h_nr_running -= idle_h_nr_running; ++ cfs_rq->h_nr_delayed -= h_nr_delayed; + + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = h_nr_running; +@@ -7155,6 +7195,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + + cfs_rq->h_nr_running -= h_nr_running; + cfs_rq->idle_h_nr_running -= idle_h_nr_running; ++ cfs_rq->h_nr_delayed -= h_nr_delayed; + + if (cfs_rq_is_idle(cfs_rq)) + idle_h_nr_running = h_nr_running; +diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c +index a9c65d97b3ca..171a802420a1 100644 +--- a/kernel/sched/pelt.c ++++ b/kernel/sched/pelt.c +@@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) + { + if (___update_load_sum(now, &cfs_rq->avg, + scale_load_down(cfs_rq->load.weight), +- cfs_rq->h_nr_running, ++ cfs_rq->h_nr_running - cfs_rq->h_nr_delayed, + cfs_rq->curr != NULL)) { + + ___update_load_avg(&cfs_rq->avg, 1); +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index c03b3d7b320e..c53696275ca1 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -649,6 +649,7 @@ struct cfs_rq { + unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ + unsigned int idle_nr_running; /* SCHED_IDLE */ + unsigned int idle_h_nr_running; /* SCHED_IDLE */ ++ unsigned int h_nr_delayed; + + s64 avg_vruntime; + u64 avg_load; +@@ -898,8 +899,11 @@ struct dl_rq { + + static inline void se_update_runnable(struct sched_entity *se) + { +- if (!entity_is_task(se)) +- se->runnable_weight = se->my_q->h_nr_running; ++ if (!entity_is_task(se)) { ++ struct cfs_rq *cfs_rq = se->my_q; ++ ++ se->runnable_weight = cfs_rq->h_nr_running - cfs_rq->h_nr_delayed; ++ } + } + + static inline long se_runnable(struct sched_entity *se) +-- +2.39.5 + diff --git a/queue-6.12/sched-fair-fix-next_buddy.patch b/queue-6.12/sched-fair-fix-next_buddy.patch new file mode 100644 index 00000000000..8c35cd2008d --- /dev/null +++ b/queue-6.12/sched-fair-fix-next_buddy.patch @@ -0,0 +1,69 @@ +From 4ad329aa5f61d1ce6c81094d25ee4a35dc4e5ebb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Nov 2024 12:59:54 +0530 +Subject: sched/fair: Fix NEXT_BUDDY + +From: K Prateek Nayak + +[ Upstream commit 493afbd187c4c9cc1642792c0d9ba400c3d6d90d ] + +Adam reports that enabling NEXT_BUDDY insta triggers a WARN in +pick_next_entity(). + +Moving clear_buddies() up before the delayed dequeue bits ensures +no ->next buddy becomes delayed. Further ensure no new ->next buddy +ever starts as delayed. + +Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") +Reported-by: Adam Li +Signed-off-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Adam Li +Link: https://lkml.kernel.org/r/670a0d54-e398-4b1f-8a6e-90784e2fdf89@amd.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 782ce70ebd1b..c467e389cd6f 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5484,6 +5484,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + bool sleep = flags & DEQUEUE_SLEEP; + + update_curr(cfs_rq); ++ clear_buddies(cfs_rq, se); + + if (flags & DEQUEUE_DELAYED) { + SCHED_WARN_ON(!se->sched_delayed); +@@ -5500,8 +5501,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + + if (sched_feat(DELAY_DEQUEUE) && delay && + !entity_eligible(cfs_rq, se)) { +- if (cfs_rq->next == se) +- cfs_rq->next = NULL; + update_load_avg(cfs_rq, se, 0); + se->sched_delayed = 1; + return false; +@@ -5526,8 +5525,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + + update_stats_dequeue_fair(cfs_rq, se, flags); + +- clear_buddies(cfs_rq, se); +- + update_entity_lag(cfs_rq, se); + if (sched_feat(PLACE_REL_DEADLINE) && !sleep) { + se->deadline -= se->vruntime; +@@ -8786,7 +8783,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int + if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) + return; + +- if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) { ++ if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) { + set_next_buddy(pse); + } + +-- +2.39.5 + diff --git a/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch b/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch new file mode 100644 index 00000000000..c05fff290db --- /dev/null +++ b/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch @@ -0,0 +1,42 @@ +From 4293c828ce43bf82f65abe9fd85d5138ddb58d99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Dec 2024 18:45:56 +0100 +Subject: sched/fair: Fix sched_can_stop_tick() for fair tasks + +From: Vincent Guittot + +[ Upstream commit c1f43c342e1f2e32f0620bf2e972e2a9ea0a1e60 ] + +We can't stop the tick of a rq if there are at least 2 tasks enqueued in +the whole hierarchy and not only at the root cfs rq. + +rq->cfs.nr_running tracks the number of sched_entity at one level +whereas rq->cfs.h_nr_running tracks all queued tasks in the +hierarchy. + +Fixes: 11cc374f4643b ("sched_ext: Simplify scx_can_stop_tick() invocation in sched_can_stop_tick()") +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Link: https://lore.kernel.org/r/20241202174606.4074512-2-vincent.guittot@linaro.org +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 6cc12777bb11..d07dc87787df 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1300,7 +1300,7 @@ bool sched_can_stop_tick(struct rq *rq) + if (scx_enabled() && !scx_can_stop_tick(rq)) + return false; + +- if (rq->cfs.nr_running > 1) ++ if (rq->cfs.h_nr_running > 1) + return false; + + /* +-- +2.39.5 + diff --git a/queue-6.12/series b/queue-6.12/series index 19e6f6cda0b..9a2830f783a 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -1 +1,27 @@ net-sched-fix-ordering-of-qlen-adjustment.patch +net-stmmac-fix-tso-dma-api-usage-causing-oops.patch +firmware-arm_scmi-fix-i.mx-build-dependency.patch +firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch +risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch +sched-fair-fix-next_buddy.patch +sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch +sched-eevdf-more-pelt-vs-delayed_dequeue.patch +p2sb-factor-out-p2sb_read_from_cache.patch +p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch +p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch +p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch +i2c-pnx-fix-timeout-in-wait-functions.patch +s390-ipl-fix-never-less-than-zero-warning.patch +erofs-fix-psi-memstall-accounting.patch +sched-dlserver-fix-dlserver-double-enqueue.patch +sched-dlserver-fix-dlserver-time-accounting.patch +s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch +erofs-add-erofs_sb_free-helper.patch +erofs-use-struct-erofs_device_info-for-the-primary-d.patch +erofs-reference-struct-erofs_device_info-for-erofs_m.patch +erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch +xfs-sb_spino_align-is-not-verified.patch +xfs-fix-sparse-inode-limits-on-runt-ag.patch +xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch +xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch +xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch diff --git a/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch b/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch new file mode 100644 index 00000000000..50a82621486 --- /dev/null +++ b/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch @@ -0,0 +1,114 @@ +From 289808d43a0780664c93ce21d6b74573775fbc3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2024 11:50:52 -0800 +Subject: xfs: fix off-by-one error in fsmap's end_daddr usage + +From: Darrick J. Wong + +commit a440a28ddbdcb861150987b4d6e828631656b92f upstream. + +In commit ca6448aed4f10a, we created an "end_daddr" variable to fix +fsmap reporting when the end of the range requested falls in the middle +of an unknown (aka free on the rmapbt) region. Unfortunately, I didn't +notice that the the code sets end_daddr to the last sector of the device +but then uses that quantity to compute the length of the synthesized +mapping. + +Zizhi Wo later observed that when end_daddr isn't set, we still don't +report the last fsblock on a device because in that case (aka when +info->last is true), the info->high mapping that we pass to +xfs_getfsmap_group_helper has a startblock that points to the last +fsblock. This is also wrong because the code uses startblock to +compute the length of the synthesized mapping. + +Fix the second problem by setting end_daddr unconditionally, and fix the +first problem by setting start_daddr to one past the end of the range to +query. + +Cc: # v6.11 +Fixes: ca6448aed4f10a ("xfs: Fix missing interval for missing_owner in xfs fsmap") +Signed-off-by: "Darrick J. Wong" +Reported-by: Zizhi Wo +Reviewed-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 29 ++++++++++++++++++----------- + 1 file changed, 18 insertions(+), 11 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index ae18ab86e608..8712b891defb 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -162,7 +162,8 @@ struct xfs_getfsmap_info { + xfs_daddr_t next_daddr; /* next daddr we expect */ + /* daddr of low fsmap key when we're using the rtbitmap */ + xfs_daddr_t low_daddr; +- xfs_daddr_t end_daddr; /* daddr of high fsmap key */ ++ /* daddr of high fsmap key, or the last daddr on the device */ ++ xfs_daddr_t end_daddr; + u64 missing_owner; /* owner of holes */ + u32 dev; /* device id */ + /* +@@ -306,7 +307,7 @@ xfs_getfsmap_helper( + * Note that if the btree query found a mapping, there won't be a gap. + */ + if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) +- rec_daddr = info->end_daddr; ++ rec_daddr = info->end_daddr + 1; + + /* Are we just counting mappings? */ + if (info->head->fmh_count == 0) { +@@ -898,7 +899,10 @@ xfs_getfsmap( + struct xfs_trans *tp = NULL; + struct xfs_fsmap dkeys[2]; /* per-dev keys */ + struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; +- struct xfs_getfsmap_info info = { NULL }; ++ struct xfs_getfsmap_info info = { ++ .fsmap_recs = fsmap_recs, ++ .head = head, ++ }; + bool use_rmap; + int i; + int error = 0; +@@ -963,9 +967,6 @@ xfs_getfsmap( + + info.next_daddr = head->fmh_keys[0].fmr_physical + + head->fmh_keys[0].fmr_length; +- info.end_daddr = XFS_BUF_DADDR_NULL; +- info.fsmap_recs = fsmap_recs; +- info.head = head; + + /* For each device we support... */ + for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { +@@ -978,17 +979,23 @@ xfs_getfsmap( + break; + + /* +- * If this device number matches the high key, we have +- * to pass the high key to the handler to limit the +- * query results. If the device number exceeds the +- * low key, zero out the low key so that we get +- * everything from the beginning. ++ * If this device number matches the high key, we have to pass ++ * the high key to the handler to limit the query results, and ++ * set the end_daddr so that we can synthesize records at the ++ * end of the query range or device. + */ + if (handlers[i].dev == head->fmh_keys[1].fmr_device) { + dkeys[1] = head->fmh_keys[1]; + info.end_daddr = min(handlers[i].nr_sectors - 1, + dkeys[1].fmr_physical); ++ } else { ++ info.end_daddr = handlers[i].nr_sectors - 1; + } ++ ++ /* ++ * If the device number exceeds the low key, zero out the low ++ * key so that we get everything from the beginning. ++ */ + if (handlers[i].dev > head->fmh_keys[0].fmr_device) + memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); + +-- +2.39.5 + diff --git a/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch b/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch new file mode 100644 index 00000000000..09461d04dff --- /dev/null +++ b/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch @@ -0,0 +1,89 @@ +From 3720905f499187a244d9070293aed91dc03c4b45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2024 11:51:07 -0800 +Subject: xfs: fix sb_spino_align checks for large fsblock sizes + +From: Darrick J. Wong + +commit 7f8a44f37229fc76bfcafa341a4b8862368ef44a upstream. + +For a sparse inodes filesystem, mkfs.xfs computes the values of +sb_spino_align and sb_inoalignmt with the following code: + + int cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + + if (cfg->sb_feat.crcs_enabled) + cluster_size *= cfg->inodesize / XFS_DINODE_MIN_SIZE; + + sbp->sb_spino_align = cluster_size >> cfg->blocklog; + sbp->sb_inoalignmt = XFS_INODES_PER_CHUNK * + cfg->inodesize >> cfg->blocklog; + +On a V5 filesystem with 64k fsblocks and 512 byte inodes, this results +in cluster_size = 8192 * (512 / 256) = 16384. As a result, +sb_spino_align and sb_inoalignmt are both set to zero. Unfortunately, +this trips the new sb_spino_align check that was just added to +xfs_validate_sb_common, and the mkfs fails: + +# mkfs.xfs -f -b size=64k, /dev/sda +meta-data=/dev/sda isize=512 agcount=4, agsize=81136 blks + = sectsz=512 attr=2, projid32bit=1 + = crc=1 finobt=1, sparse=1, rmapbt=1 + = reflink=1 bigtime=1 inobtcount=1 nrext64=1 + = exchange=0 metadir=0 +data = bsize=65536 blocks=324544, imaxpct=25 + = sunit=0 swidth=0 blks +naming =version 2 bsize=65536 ascii-ci=0, ftype=1, parent=0 +log =internal log bsize=65536 blocks=5006, version=2 + = sectsz=512 sunit=0 blks, lazy-count=1 +realtime =none extsz=65536 blocks=0, rtextents=0 + = rgcount=0 rgsize=0 extents +Discarding blocks...Sparse inode alignment (0) is invalid. +Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200 +libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1 +mkfs.xfs: Releasing dirty buffer to free list! +found dirty buffer (bulk) on free list! +Sparse inode alignment (0) is invalid. +Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200 +libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1 +mkfs.xfs: writing AG headers failed, err=22 + +Prior to commit 59e43f5479cce1 this all worked fine, even if "sparse" +inodes are somewhat meaningless when everything fits in a single +fsblock. Adjust the checks to handle existing filesystems. + +Cc: # v6.13-rc1 +Fixes: 59e43f5479cce1 ("xfs: sb_spino_align is not verified") +Signed-off-by: "Darrick J. Wong" +Reviewed-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_sb.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c +index 9e0ae312bc80..e27b63281d01 100644 +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -392,12 +392,13 @@ xfs_validate_sb_common( + return -EINVAL; + } + +- if (!sbp->sb_spino_align || +- sbp->sb_spino_align > sbp->sb_inoalignmt || +- (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) { ++ if (sbp->sb_spino_align && ++ (sbp->sb_spino_align > sbp->sb_inoalignmt || ++ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) { + xfs_warn(mp, +- "Sparse inode alignment (%u) is invalid.", +- sbp->sb_spino_align); ++"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).", ++ sbp->sb_spino_align, ++ sbp->sb_inoalignmt); + return -EINVAL; + } + } else if (sbp->sb_spino_align) { +-- +2.39.5 + diff --git a/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch b/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch new file mode 100644 index 00000000000..e5daa559a34 --- /dev/null +++ b/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch @@ -0,0 +1,89 @@ +From fe0558bded8b5cfdd3143a116b3ec5c965377716 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2024 11:50:36 -0800 +Subject: xfs: fix sparse inode limits on runt AG + +From: Dave Chinner + +commit 13325333582d4820d39b9e8f63d6a54e745585d9 upstream. + +The runt AG at the end of a filesystem is almost always smaller than +the mp->m_sb.sb_agblocks. Unfortunately, when setting the max_agbno +limit for the inode chunk allocation, we do not take this into +account. This means we can allocate a sparse inode chunk that +overlaps beyond the end of an AG. When we go to allocate an inode +from that sparse chunk, the irec fails validation because the +agbno of the start of the irec is beyond valid limits for the runt +AG. + +Prevent this from happening by taking into account the size of the +runt AG when allocating inode chunks. Also convert the various +checks for valid inode chunk agbnos to use xfs_ag_block_count() +so that they will also catch such issues in the future. + +Fixes: 56d1115c9bc7 ("xfs: allocate sparse inode chunks on full chunk allocation failure") +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +[djwong: backport to stable because upstream maintainer ignored cc-stable] +Link: https://lore.kernel.org/linux-xfs/20241112231539.GG9438@frogsfrogsfrogs/ +Signed-off-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_ialloc.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c +index 271855227514..6258527315f2 100644 +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -855,7 +855,8 @@ xfs_ialloc_ag_alloc( + * the end of the AG. + */ + args.min_agbno = args.mp->m_sb.sb_inoalignmt; +- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, ++ args.max_agbno = round_down(xfs_ag_block_count(args.mp, ++ pag->pag_agno), + args.mp->m_sb.sb_inoalignmt) - + igeo->ialloc_blks; + +@@ -2332,9 +2333,9 @@ xfs_difree( + return -EINVAL; + } + agbno = XFS_AGINO_TO_AGBNO(mp, agino); +- if (agbno >= mp->m_sb.sb_agblocks) { +- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", +- __func__, agbno, mp->m_sb.sb_agblocks); ++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) { ++ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).", ++ __func__, agbno, xfs_ag_block_count(mp, pag->pag_agno)); + ASSERT(0); + return -EINVAL; + } +@@ -2457,7 +2458,7 @@ xfs_imap( + */ + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); +- if (agbno >= mp->m_sb.sb_agblocks || ++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno) || + ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + error = -EINVAL; + #ifdef DEBUG +@@ -2467,11 +2468,12 @@ xfs_imap( + */ + if (flags & XFS_IGET_UNTRUSTED) + return error; +- if (agbno >= mp->m_sb.sb_agblocks) { ++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) { + xfs_alert(mp, + "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", + __func__, (unsigned long long)agbno, +- (unsigned long)mp->m_sb.sb_agblocks); ++ (unsigned long)xfs_ag_block_count(mp, ++ pag->pag_agno)); + } + if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + xfs_alert(mp, +-- +2.39.5 + diff --git a/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch b/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch new file mode 100644 index 00000000000..3b14a1c926d --- /dev/null +++ b/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch @@ -0,0 +1,84 @@ +From 6208be0894ce64483b9e968e9e5dbe3b4c7f31fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2024 11:51:23 -0800 +Subject: xfs: fix zero byte checking in the superblock scrubber + +From: Darrick J. Wong + +commit c004a793e0ec34047c3bd423bcd8966f5fac88dc upstream. + +The logic to check that the region past the end of the superblock is all +zeroes is wrong -- we don't want to check only the bytes past the end of +the maximally sized ondisk superblock structure as currently defined in +xfs_format.h; we want to check the bytes beyond the end of the ondisk as +defined by the feature bits. + +Port the superblock size logic from xfs_repair and then put it to use in +xfs_scrub. + +Cc: # v4.15 +Fixes: 21fb4cb1981ef7 ("xfs: scrub the secondary superblocks") +Signed-off-by: "Darrick J. Wong" +Reviewed-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + fs/xfs/scrub/agheader.c | 29 +++++++++++++++++++++++++++-- + 1 file changed, 27 insertions(+), 2 deletions(-) + +diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c +index da30f926cbe6..0f2f1852d58f 100644 +--- a/fs/xfs/scrub/agheader.c ++++ b/fs/xfs/scrub/agheader.c +@@ -59,6 +59,30 @@ xchk_superblock_xref( + /* scrub teardown will take care of sc->sa for us */ + } + ++/* ++ * Calculate the ondisk superblock size in bytes given the feature set of the ++ * mounted filesystem (aka the primary sb). This is subtlely different from ++ * the logic in xfs_repair, which computes the size of a secondary sb given the ++ * featureset listed in the secondary sb. ++ */ ++STATIC size_t ++xchk_superblock_ondisk_size( ++ struct xfs_mount *mp) ++{ ++ if (xfs_has_metauuid(mp)) ++ return offsetofend(struct xfs_dsb, sb_meta_uuid); ++ if (xfs_has_crc(mp)) ++ return offsetofend(struct xfs_dsb, sb_lsn); ++ if (xfs_sb_version_hasmorebits(&mp->m_sb)) ++ return offsetofend(struct xfs_dsb, sb_bad_features2); ++ if (xfs_has_logv2(mp)) ++ return offsetofend(struct xfs_dsb, sb_logsunit); ++ if (xfs_has_sector(mp)) ++ return offsetofend(struct xfs_dsb, sb_logsectsize); ++ /* only support dirv2 or more recent */ ++ return offsetofend(struct xfs_dsb, sb_dirblklog); ++} ++ + /* + * Scrub the filesystem superblock. + * +@@ -75,6 +99,7 @@ xchk_superblock( + struct xfs_buf *bp; + struct xfs_dsb *sb; + struct xfs_perag *pag; ++ size_t sblen; + xfs_agnumber_t agno; + uint32_t v2_ok; + __be32 features_mask; +@@ -350,8 +375,8 @@ xchk_superblock( + } + + /* Everything else must be zero. */ +- if (memchr_inv(sb + 1, 0, +- BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) ++ sblen = xchk_superblock_ondisk_size(mp); ++ if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen)) + xchk_block_set_corrupt(sc, bp); + + xchk_superblock_xref(sc, bp); +-- +2.39.5 + diff --git a/queue-6.12/xfs-sb_spino_align-is-not-verified.patch b/queue-6.12/xfs-sb_spino_align-is-not-verified.patch new file mode 100644 index 00000000000..74635dabf45 --- /dev/null +++ b/queue-6.12/xfs-sb_spino_align-is-not-verified.patch @@ -0,0 +1,52 @@ +From c5555322e43ec7aa41de5938da5bc8b4da32fba2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Dec 2024 11:50:20 -0800 +Subject: xfs: sb_spino_align is not verified + +From: Dave Chinner + +commit 59e43f5479cce106d71c0b91a297c7ad1913176c upstream. + +It's just read in from the superblock and used without doing any +validity checks at all on the value. + +Fixes: fb4f2b4e5a82 ("xfs: add sparse inode chunk alignment superblock field") +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +[djwong: actually tag for 6.12 because upstream maintainer ignored cc-stable tag] +Link: https://lore.kernel.org/linux-xfs/20241024165544.GI21853@frogsfrogsfrogs/ +Signed-off-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c +index 02ebcbc4882f..9e0ae312bc80 100644 +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -391,6 +391,20 @@ xfs_validate_sb_common( + sbp->sb_inoalignmt, align); + return -EINVAL; + } ++ ++ if (!sbp->sb_spino_align || ++ sbp->sb_spino_align > sbp->sb_inoalignmt || ++ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) { ++ xfs_warn(mp, ++ "Sparse inode alignment (%u) is invalid.", ++ sbp->sb_spino_align); ++ return -EINVAL; ++ } ++ } else if (sbp->sb_spino_align) { ++ xfs_warn(mp, ++ "Sparse inode alignment (%u) should be zero.", ++ sbp->sb_spino_align); ++ return -EINVAL; + } + } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | + XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { +-- +2.39.5 +