--- /dev/null
+From e22342b717900cb58e6c9134ee441ae5b5023f0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 21:35:01 +0800
+Subject: erofs: add erofs_sb_free() helper
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit e2de3c1bf6a0c99b089bd706a62da8f988918858 ]
+
+Unify the common parts of erofs_fc_free() and erofs_kill_sb() as
+erofs_sb_free().
+
+Thus, fput() in erofs_fc_get_tree() is no longer needed, too.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212133504.2047178-1-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/super.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index 2dd7d819572f..c40821346d50 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -718,16 +718,19 @@ static int erofs_fc_get_tree(struct fs_context *fc)
+ GET_TREE_BDEV_QUIET_LOOKUP : 0);
+ #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
+ if (ret == -ENOTBLK) {
++ struct file *file;
++
+ if (!fc->source)
+ return invalf(fc, "No source specified");
+- sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+- if (IS_ERR(sbi->fdev))
+- return PTR_ERR(sbi->fdev);
++
++ file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
++ if (IS_ERR(file))
++ return PTR_ERR(file);
++ sbi->fdev = file;
+
+ if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
+ sbi->fdev->f_mapping->a_ops->read_folio)
+ return get_tree_nodev(fc, erofs_fc_fill_super);
+- fput(sbi->fdev);
+ }
+ #endif
+ return ret;
+@@ -778,19 +781,24 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
+ kfree(devs);
+ }
+
+-static void erofs_fc_free(struct fs_context *fc)
++static void erofs_sb_free(struct erofs_sb_info *sbi)
+ {
+- struct erofs_sb_info *sbi = fc->s_fs_info;
+-
+- if (!sbi)
+- return;
+-
+ erofs_free_dev_context(sbi->devs);
+ kfree(sbi->fsid);
+ kfree(sbi->domain_id);
++ if (sbi->fdev)
++ fput(sbi->fdev);
+ kfree(sbi);
+ }
+
++static void erofs_fc_free(struct fs_context *fc)
++{
++ struct erofs_sb_info *sbi = fc->s_fs_info;
++
++ if (sbi) /* free here if an error occurs before transferring to sb */
++ erofs_sb_free(sbi);
++}
++
+ static const struct fs_context_operations erofs_context_ops = {
+ .parse_param = erofs_fc_parse_param,
+ .get_tree = erofs_fc_get_tree,
+@@ -828,15 +836,9 @@ static void erofs_kill_sb(struct super_block *sb)
+ kill_anon_super(sb);
+ else
+ kill_block_super(sb);
+-
+- erofs_free_dev_context(sbi->devs);
+ fs_put_dax(sbi->dax_dev, NULL);
+ erofs_fscache_unregister_fs(sb);
+- kfree(sbi->fsid);
+- kfree(sbi->domain_id);
+- if (sbi->fdev)
+- fput(sbi->fdev);
+- kfree(sbi);
++ erofs_sb_free(sbi);
+ sb->s_fs_info = NULL;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From b2236194259e71097c538f6e23ac5dc9e90e1cb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Nov 2024 16:52:36 +0800
+Subject: erofs: fix PSI memstall accounting
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 1a2180f6859c73c674809f9f82e36c94084682ba ]
+
+Max Kellermann recently reported psi_group_cpu.tasks[NR_MEMSTALL] is
+incorrect in the 6.11.9 kernel.
+
+The root cause appears to be that, since the problematic commit, bio
+can be NULL, causing psi_memstall_leave() to be skipped in
+z_erofs_submit_queue().
+
+Reported-by: Max Kellermann <max.kellermann@ionos.com>
+Closes: https://lore.kernel.org/r/CAKPOu+8tvSowiJADW2RuKyofL_CSkm_SuyZA7ME5vMLWmL6pqw@mail.gmail.com
+Fixes: 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly")
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241127085236.3538334-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/zdata.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index a569ff9dfd04..1a00f061798a 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -1679,9 +1679,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
+ erofs_fscache_submit_bio(bio);
+ else
+ submit_bio(bio);
+- if (memstall)
+- psi_memstall_leave(&pflags);
+ }
++ if (memstall)
++ psi_memstall_leave(&pflags);
+
+ /*
+ * although background is preferred, no one is pending for submission.
+--
+2.39.5
+
--- /dev/null
+From 350f3baf7618fcdaf53e7a7a4e6d9dfc7b35784d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Dec 2024 07:54:01 +0800
+Subject: erofs: reference `struct erofs_device_info` for erofs_map_dev
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit f8d920a402aec3482931cb5f1539ed438740fc49 ]
+
+Record `m_sb` and `m_dif` to replace `m_fscache`, `m_daxdev`, `m_fp`
+and `m_dax_part_off` in order to simplify the codebase.
+
+Note that `m_bdev` is still left since it can be assigned from
+`sb->s_bdev` directly.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212235401.2857246-1-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/data.c | 26 ++++++++++----------------
+ fs/erofs/fileio.c | 2 +-
+ fs/erofs/fscache.c | 4 ++--
+ fs/erofs/internal.h | 6 ++----
+ 4 files changed, 15 insertions(+), 23 deletions(-)
+
+diff --git a/fs/erofs/data.c b/fs/erofs/data.c
+index 365c988262b1..722151d3fee8 100644
+--- a/fs/erofs/data.c
++++ b/fs/erofs/data.c
+@@ -186,19 +186,13 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
+ }
+
+ static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
+- struct erofs_device_info *dif)
++ struct super_block *sb, struct erofs_device_info *dif)
+ {
++ map->m_sb = sb;
++ map->m_dif = dif;
+ map->m_bdev = NULL;
+- map->m_fp = NULL;
+- if (dif->file) {
+- if (S_ISBLK(file_inode(dif->file)->i_mode))
+- map->m_bdev = file_bdev(dif->file);
+- else
+- map->m_fp = dif->file;
+- }
+- map->m_daxdev = dif->dax_dev;
+- map->m_dax_part_off = dif->dax_part_off;
+- map->m_fscache = dif->fscache;
++ if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
++ map->m_bdev = file_bdev(dif->file);
+ }
+
+ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+@@ -208,7 +202,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+ erofs_off_t startoff, length;
+ int id;
+
+- erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0);
++ erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
+ map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
+ if (map->m_deviceid) {
+ down_read(&devs->rwsem);
+@@ -222,7 +216,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+ up_read(&devs->rwsem);
+ return 0;
+ }
+- erofs_fill_from_devinfo(map, dif);
++ erofs_fill_from_devinfo(map, sb, dif);
+ up_read(&devs->rwsem);
+ } else if (devs->extra_devices && !devs->flatdev) {
+ down_read(&devs->rwsem);
+@@ -235,7 +229,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+ if (map->m_pa >= startoff &&
+ map->m_pa < startoff + length) {
+ map->m_pa -= startoff;
+- erofs_fill_from_devinfo(map, dif);
++ erofs_fill_from_devinfo(map, sb, dif);
+ break;
+ }
+ }
+@@ -305,7 +299,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+
+ iomap->offset = map.m_la;
+ if (flags & IOMAP_DAX)
+- iomap->dax_dev = mdev.m_daxdev;
++ iomap->dax_dev = mdev.m_dif->dax_dev;
+ else
+ iomap->bdev = mdev.m_bdev;
+ iomap->length = map.m_llen;
+@@ -334,7 +328,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ iomap->type = IOMAP_MAPPED;
+ iomap->addr = mdev.m_pa;
+ if (flags & IOMAP_DAX)
+- iomap->addr += mdev.m_dax_part_off;
++ iomap->addr += mdev.m_dif->dax_part_off;
+ }
+ return 0;
+ }
+diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
+index 3af96b1e2c2a..a61b8faec651 100644
+--- a/fs/erofs/fileio.c
++++ b/fs/erofs/fileio.c
+@@ -67,7 +67,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
+ GFP_KERNEL | __GFP_NOFAIL);
+
+ bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+- rq->iocb.ki_filp = mdev->m_fp;
++ rq->iocb.ki_filp = mdev->m_dif->file;
+ return rq;
+ }
+
+diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
+index ce7e38c82719..ce3d8737df85 100644
+--- a/fs/erofs/fscache.c
++++ b/fs/erofs/fscache.c
+@@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
+
+ io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
+ bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+- io->io.private = mdev->m_fscache->cookie;
++ io->io.private = mdev->m_dif->fscache->cookie;
+ io->io.end_io = erofs_fscache_bio_endio;
+ refcount_set(&io->io.ref, 1);
+ return &io->bio;
+@@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
+ if (!io)
+ return -ENOMEM;
+ iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+- ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
++ ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
+ mdev.m_pa + (pos - map.m_la), io);
+ erofs_fscache_req_io_put(io);
+
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index d70aa2410472..3108ece1d709 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -366,11 +366,9 @@ enum {
+ };
+
+ struct erofs_map_dev {
+- struct erofs_fscache *m_fscache;
++ struct super_block *m_sb;
++ struct erofs_device_info *m_dif;
+ struct block_device *m_bdev;
+- struct dax_device *m_daxdev;
+- struct file *m_fp;
+- u64 m_dax_part_off;
+
+ erofs_off_t m_pa;
+ unsigned int m_deviceid;
+--
+2.39.5
+
--- /dev/null
+From 11a8695f7aed6930bfc758b83fec24f1eb4ebcb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 21:43:36 +0800
+Subject: erofs: use buffered I/O for file-backed mounts by default
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 6422cde1b0d5a31b206b263417c1c2b3c80fe82c ]
+
+For many use cases (e.g. container images are just fetched from remote),
+performance will be impacted if underlay page cache is up-to-date but
+direct i/o flushes dirty pages first.
+
+Instead, let's use buffered I/O by default to keep in sync with loop
+devices and add a (re)mount option to explicitly give a try to use
+direct I/O if supported by the underlying files.
+
+The container startup time is improved as below:
+[workload] docker.io/library/workpress:latest
+ unpack 1st run non-1st runs
+EROFS snapshotter buffered I/O file 4.586404265s 0.308s 0.198s
+EROFS snapshotter direct I/O file 4.581742849s 2.238s 0.222s
+EROFS snapshotter loop 4.596023152s 0.346s 0.201s
+Overlayfs snapshotter 5.382851037s 0.206s 0.214s
+
+Fixes: fb176750266a ("erofs: add file-backed mount support")
+Cc: Derek McGowan <derek@mcg.dev>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212134336.2059899-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/fileio.c | 7 +++++--
+ fs/erofs/internal.h | 1 +
+ fs/erofs/super.c | 23 +++++++++++++++--------
+ 3 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
+index a61b8faec651..33f8539dda4a 100644
+--- a/fs/erofs/fileio.c
++++ b/fs/erofs/fileio.c
+@@ -9,6 +9,7 @@ struct erofs_fileio_rq {
+ struct bio_vec bvecs[BIO_MAX_VECS];
+ struct bio bio;
+ struct kiocb iocb;
++ struct super_block *sb;
+ };
+
+ struct erofs_fileio {
+@@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
+ rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ rq->iocb.ki_ioprio = get_current_ioprio();
+ rq->iocb.ki_complete = erofs_fileio_ki_complete;
+- rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
+- IOCB_DIRECT : 0;
++ if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
++ rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT)
++ rq->iocb.ki_flags = IOCB_DIRECT;
+ iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
+ rq->bio.bi_iter.bi_size);
+ ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
+@@ -68,6 +70,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
+
+ bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+ rq->iocb.ki_filp = mdev->m_dif->file;
++ rq->sb = mdev->m_sb;
+ return rq;
+ }
+
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index 3108ece1d709..77e785a6dfa7 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -182,6 +182,7 @@ struct erofs_sb_info {
+ #define EROFS_MOUNT_POSIX_ACL 0x00000020
+ #define EROFS_MOUNT_DAX_ALWAYS 0x00000040
+ #define EROFS_MOUNT_DAX_NEVER 0x00000080
++#define EROFS_MOUNT_DIRECT_IO 0x00000100
+
+ #define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
+ #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index 60f7bd43a5a4..5b279977c9d5 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -379,14 +379,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
+ }
+
+ enum {
+- Opt_user_xattr,
+- Opt_acl,
+- Opt_cache_strategy,
+- Opt_dax,
+- Opt_dax_enum,
+- Opt_device,
+- Opt_fsid,
+- Opt_domain_id,
++ Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
++ Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
+ Opt_err
+ };
+
+@@ -413,6 +407,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
+ fsparam_string("device", Opt_device),
+ fsparam_string("fsid", Opt_fsid),
+ fsparam_string("domain_id", Opt_domain_id),
++ fsparam_flag_no("directio", Opt_directio),
+ {}
+ };
+
+@@ -526,6 +521,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
+ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
+ break;
+ #endif
++ case Opt_directio:
++#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
++ if (result.boolean)
++ set_opt(&sbi->opt, DIRECT_IO);
++ else
++ clear_opt(&sbi->opt, DIRECT_IO);
++#else
++ errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
++#endif
++ break;
+ default:
+ return -ENOPARAM;
+ }
+@@ -963,6 +968,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+ seq_puts(seq, ",dax=always");
+ if (test_opt(opt, DAX_NEVER))
+ seq_puts(seq, ",dax=never");
++ if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO))
++ seq_puts(seq, ",directio");
+ #ifdef CONFIG_EROFS_FS_ONDEMAND
+ if (sbi->fsid)
+ seq_printf(seq, ",fsid=%s", sbi->fsid);
+--
+2.39.5
+
--- /dev/null
+From a4931361873eb6e0e8ab75aa98a1d3c7d78e46de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2024 20:53:08 +0800
+Subject: erofs: use `struct erofs_device_info` for the primary device
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 7b00af2c5414dc01e0718deef7ead81102867636 ]
+
+Instead of just listing each one directly in `struct erofs_sb_info`
+except that we still use `sb->s_bdev` for the primary block device.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241216125310.930933-2-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/data.c | 12 ++++--------
+ fs/erofs/fscache.c | 6 +++---
+ fs/erofs/internal.h | 8 ++------
+ fs/erofs/super.c | 27 +++++++++++++--------------
+ 4 files changed, 22 insertions(+), 31 deletions(-)
+
+diff --git a/fs/erofs/data.c b/fs/erofs/data.c
+index fa51437e1d99..365c988262b1 100644
+--- a/fs/erofs/data.c
++++ b/fs/erofs/data.c
+@@ -63,10 +63,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
+
+ buf->file = NULL;
+ if (erofs_is_fileio_mode(sbi)) {
+- buf->file = sbi->fdev; /* some fs like FUSE needs it */
++ buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
+ buf->mapping = buf->file->f_mapping;
+ } else if (erofs_is_fscache_mode(sb))
+- buf->mapping = sbi->s_fscache->inode->i_mapping;
++ buf->mapping = sbi->dif0.fscache->inode->i_mapping;
+ else
+ buf->mapping = sb->s_bdev->bd_mapping;
+ }
+@@ -208,12 +208,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+ erofs_off_t startoff, length;
+ int id;
+
+- map->m_bdev = sb->s_bdev;
+- map->m_daxdev = EROFS_SB(sb)->dax_dev;
+- map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
+- map->m_fscache = EROFS_SB(sb)->s_fscache;
+- map->m_fp = EROFS_SB(sb)->fdev;
+-
++ erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0);
++ map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */
+ if (map->m_deviceid) {
+ down_read(&devs->rwsem);
+ dif = idr_find(&devs->tree, map->m_deviceid - 1);
+diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
+index fda16eedafb5..ce7e38c82719 100644
+--- a/fs/erofs/fscache.c
++++ b/fs/erofs/fscache.c
+@@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
+ if (IS_ERR(fscache))
+ return PTR_ERR(fscache);
+
+- sbi->s_fscache = fscache;
++ sbi->dif0.fscache = fscache;
+ return 0;
+ }
+
+@@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
+ {
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+- erofs_fscache_unregister_cookie(sbi->s_fscache);
++ erofs_fscache_unregister_cookie(sbi->dif0.fscache);
+
+ if (sbi->domain)
+ erofs_fscache_domain_put(sbi->domain);
+ else
+ fscache_relinquish_volume(sbi->volume, NULL, false);
+
+- sbi->s_fscache = NULL;
++ sbi->dif0.fscache = NULL;
+ sbi->volume = NULL;
+ sbi->domain = NULL;
+ }
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index 9b03c8f323a7..d70aa2410472 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -113,6 +113,7 @@ struct erofs_xattr_prefix_item {
+ };
+
+ struct erofs_sb_info {
++ struct erofs_device_info dif0;
+ struct erofs_mount_opts opt; /* options */
+ #ifdef CONFIG_EROFS_FS_ZIP
+ /* list for all registered superblocks, mainly for shrinker */
+@@ -130,13 +131,9 @@ struct erofs_sb_info {
+
+ struct erofs_sb_lz4_info lz4;
+ #endif /* CONFIG_EROFS_FS_ZIP */
+- struct file *fdev;
+ struct inode *packed_inode;
+ struct erofs_dev_context *devs;
+- struct dax_device *dax_dev;
+- u64 dax_part_off;
+ u64 total_blocks;
+- u32 primarydevice_blocks;
+
+ u32 meta_blkaddr;
+ #ifdef CONFIG_EROFS_FS_XATTR
+@@ -172,7 +169,6 @@ struct erofs_sb_info {
+
+ /* fscache support */
+ struct fscache_volume *volume;
+- struct erofs_fscache *s_fscache;
+ struct erofs_domain *domain;
+ char *fsid;
+ char *domain_id;
+@@ -193,7 +189,7 @@ struct erofs_sb_info {
+
+ static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
+ {
+- return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
++ return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
+ }
+
+ static inline bool erofs_is_fscache_mode(struct super_block *sb)
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index c40821346d50..60f7bd43a5a4 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -218,7 +218,7 @@ static int erofs_scan_devices(struct super_block *sb,
+ struct erofs_device_info *dif;
+ int id, err = 0;
+
+- sbi->total_blocks = sbi->primarydevice_blocks;
++ sbi->total_blocks = sbi->dif0.blocks;
+ if (!erofs_sb_has_device_table(sbi))
+ ondisk_extradevs = 0;
+ else
+@@ -322,7 +322,7 @@ static int erofs_read_superblock(struct super_block *sb)
+ sbi->sb_size);
+ goto out;
+ }
+- sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
++ sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
+ sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
+ #ifdef CONFIG_EROFS_FS_XATTR
+ sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
+@@ -617,9 +617,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+ return -EINVAL;
+ }
+
+- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
+- &sbi->dax_part_off,
+- NULL, NULL);
++ sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
++ &sbi->dif0.dax_part_off, NULL, NULL);
+ }
+
+ err = erofs_read_superblock(sb);
+@@ -642,7 +641,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+ }
+
+ if (test_opt(&sbi->opt, DAX_ALWAYS)) {
+- if (!sbi->dax_dev) {
++ if (!sbi->dif0.dax_dev) {
+ errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+ clear_opt(&sbi->opt, DAX_ALWAYS);
+ } else if (sbi->blkszbits != PAGE_SHIFT) {
+@@ -722,14 +721,13 @@ static int erofs_fc_get_tree(struct fs_context *fc)
+
+ if (!fc->source)
+ return invalf(fc, "No source specified");
+-
+ file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+- sbi->fdev = file;
++ sbi->dif0.file = file;
+
+- if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
+- sbi->fdev->f_mapping->a_ops->read_folio)
++ if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) &&
++ sbi->dif0.file->f_mapping->a_ops->read_folio)
+ return get_tree_nodev(fc, erofs_fc_fill_super);
+ }
+ #endif
+@@ -786,8 +784,8 @@ static void erofs_sb_free(struct erofs_sb_info *sbi)
+ erofs_free_dev_context(sbi->devs);
+ kfree(sbi->fsid);
+ kfree(sbi->domain_id);
+- if (sbi->fdev)
+- fput(sbi->fdev);
++ if (sbi->dif0.file)
++ fput(sbi->dif0.file);
+ kfree(sbi);
+ }
+
+@@ -832,11 +830,12 @@ static void erofs_kill_sb(struct super_block *sb)
+ {
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+- if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
++ if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) ||
++ sbi->dif0.file)
+ kill_anon_super(sb);
+ else
+ kill_block_super(sb);
+- fs_put_dax(sbi->dax_dev, NULL);
++ fs_put_dax(sbi->dif0.dax_dev, NULL);
+ erofs_fscache_unregister_fs(sb);
+ erofs_sb_free(sbi);
+ sb->s_fs_info = NULL;
+--
+2.39.5
+
--- /dev/null
+From 0a8a7df983ed842d9bb150a8e59bc92a125ec624 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 14:31:08 +0000
+Subject: firmware: arm_ffa: Fix the race around setting ffa_dev->properties
+
+From: Levi Yun <yeoreum.yun@arm.com>
+
+[ Upstream commit 6fe437cfe2cdc797b03f63b338a13fac96ed6a08 ]
+
+Currently, ffa_dev->properties is set after the ffa_device_register()
+call return in ffa_setup_partitions(). This could potentially result in
+a race where the partition's properties is accessed while probing
+struct ffa_device before it is set.
+
+Update the ffa_device_register() to receive ffa_partition_info so all
+the data from the partition information received from the firmware can
+be updated into the struct ffa_device before the calling device_register()
+in ffa_device_register().
+
+Fixes: e781858488b9 ("firmware: arm_ffa: Add initial FFA bus support for device enumeration")
+Signed-off-by: Levi Yun <yeoreum.yun@arm.com>
+Message-Id: <20241203143109.1030514-2-yeoreum.yun@arm.com>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_ffa/bus.c | 15 +++++++++++----
+ drivers/firmware/arm_ffa/driver.c | 7 +------
+ include/linux/arm_ffa.h | 13 ++++++++-----
+ 3 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
+index eb17d03b66fe..dfda5ffc14db 100644
+--- a/drivers/firmware/arm_ffa/bus.c
++++ b/drivers/firmware/arm_ffa/bus.c
+@@ -187,13 +187,18 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
+ return valid;
+ }
+
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+- const struct ffa_ops *ops)
++struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++ const struct ffa_ops *ops)
+ {
+ int id, ret;
++ uuid_t uuid;
+ struct device *dev;
+ struct ffa_device *ffa_dev;
+
++ if (!part_info)
++ return NULL;
++
+ id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
+ if (id < 0)
+ return NULL;
+@@ -210,9 +215,11 @@ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+ dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
+
+ ffa_dev->id = id;
+- ffa_dev->vm_id = vm_id;
++ ffa_dev->vm_id = part_info->id;
++ ffa_dev->properties = part_info->properties;
+ ffa_dev->ops = ops;
+- uuid_copy(&ffa_dev->uuid, uuid);
++ import_uuid(&uuid, (u8 *)part_info->uuid);
++ uuid_copy(&ffa_dev->uuid, &uuid);
+
+ ret = device_register(&ffa_dev->dev);
+ if (ret) {
+diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
+index b14cbdae94e8..2c2ec3c35f15 100644
+--- a/drivers/firmware/arm_ffa/driver.c
++++ b/drivers/firmware/arm_ffa/driver.c
+@@ -1387,7 +1387,6 @@ static struct notifier_block ffa_bus_nb = {
+ static int ffa_setup_partitions(void)
+ {
+ int count, idx, ret;
+- uuid_t uuid;
+ struct ffa_device *ffa_dev;
+ struct ffa_dev_part_info *info;
+ struct ffa_partition_info *pbuf, *tpbuf;
+@@ -1406,23 +1405,19 @@ static int ffa_setup_partitions(void)
+
+ xa_init(&drv_info->partition_info);
+ for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) {
+- import_uuid(&uuid, (u8 *)tpbuf->uuid);
+-
+ /* Note that if the UUID will be uuid_null, that will require
+ * ffa_bus_notifier() to find the UUID of this partition id
+ * with help of ffa_device_match_uuid(). FF-A v1.1 and above
+ * provides UUID here for each partition as part of the
+ * discovery API and the same is passed.
+ */
+- ffa_dev = ffa_device_register(&uuid, tpbuf->id, &ffa_drv_ops);
++ ffa_dev = ffa_device_register(tpbuf, &ffa_drv_ops);
+ if (!ffa_dev) {
+ pr_err("%s: failed to register partition ID 0x%x\n",
+ __func__, tpbuf->id);
+ continue;
+ }
+
+- ffa_dev->properties = tpbuf->properties;
+-
+ if (drv_info->version > FFA_VERSION_1_0 &&
+ !(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC))
+ ffa_mode_32bit_set(ffa_dev);
+diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
+index a28e2a6a13d0..74169dd0f659 100644
+--- a/include/linux/arm_ffa.h
++++ b/include/linux/arm_ffa.h
+@@ -166,9 +166,12 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev)
+ return dev_get_drvdata(&fdev->dev);
+ }
+
++struct ffa_partition_info;
++
+ #if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT)
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+- const struct ffa_ops *ops);
++struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++ const struct ffa_ops *ops);
+ void ffa_device_unregister(struct ffa_device *ffa_dev);
+ int ffa_driver_register(struct ffa_driver *driver, struct module *owner,
+ const char *mod_name);
+@@ -176,9 +179,9 @@ void ffa_driver_unregister(struct ffa_driver *driver);
+ bool ffa_device_is_valid(struct ffa_device *ffa_dev);
+
+ #else
+-static inline
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+- const struct ffa_ops *ops)
++static inline struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++ const struct ffa_ops *ops)
+ {
+ return NULL;
+ }
+--
+2.39.5
+
--- /dev/null
+From 6430d3a2c3b25314895a943f3de02cb7ebe747b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Nov 2024 00:05:18 +0100
+Subject: firmware: arm_scmi: Fix i.MX build dependency
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 514b2262ade48a0503ac6aa03c3bfb8c5be69b21 ]
+
+The newly added SCMI vendor driver references functions in the
+protocol driver but needs a Kconfig dependency to ensure it can link,
+essentially the Kconfig dependency needs to be reversed to match the
+link time dependency:
+
+ | arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_write':
+ | fsl_mqs.c:(.text+0x1aa): undefined reference to `scmi_imx_misc_ctrl_set'
+ | arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_read':
+ | fsl_mqs.c:(.text+0x1ee): undefined reference to `scmi_imx_misc_ctrl_get'
+
+This however only works after changing the dependency in the SND_SOC_FSL_MQS
+driver as well, which uses 'select IMX_SCMI_MISC_DRV' to turn on a
+driver it depends on. This is generally a bad idea, so the best solution
+is to change that into a dependency.
+
+To allow the ASoC driver to keep building with the SCMI support, this
+needs to be an optional dependency that enforces the link-time
+dependency if IMX_SCMI_MISC_DRV is a loadable module but not
+depend on it if that is disabled.
+
+Fixes: 61c9f03e22fc ("firmware: arm_scmi: Add initial support for i.MX MISC protocol")
+Fixes: 101c9023594a ("ASoC: fsl_mqs: Support accessing registers by scmi interface")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Mark Brown <broonie@kernel.org>
+Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>
+Message-Id: <20241115230555.2435004-1-arnd@kernel.org>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/vendors/imx/Kconfig | 1 +
+ drivers/firmware/imx/Kconfig | 1 -
+ sound/soc/fsl/Kconfig | 1 +
+ 3 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_scmi/vendors/imx/Kconfig b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
+index 2883ed24a84d..a01bf5e47301 100644
+--- a/drivers/firmware/arm_scmi/vendors/imx/Kconfig
++++ b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
+@@ -15,6 +15,7 @@ config IMX_SCMI_BBM_EXT
+ config IMX_SCMI_MISC_EXT
+ tristate "i.MX SCMI MISC EXTENSION"
+ depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
++ depends on IMX_SCMI_MISC_DRV
+ default y if ARCH_MXC
+ help
+ This enables i.MX System MISC control logic such as gpio expander
+diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
+index 477d3f32d99a..907cd149c40a 100644
+--- a/drivers/firmware/imx/Kconfig
++++ b/drivers/firmware/imx/Kconfig
+@@ -25,7 +25,6 @@ config IMX_SCU
+
+ config IMX_SCMI_MISC_DRV
+ tristate "IMX SCMI MISC Protocol driver"
+- depends on IMX_SCMI_MISC_EXT || COMPILE_TEST
+ default y if ARCH_MXC
+ help
+ The System Controller Management Interface firmware (SCMI FW) is
+diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
+index e283751abfef..678540b78280 100644
+--- a/sound/soc/fsl/Kconfig
++++ b/sound/soc/fsl/Kconfig
+@@ -29,6 +29,7 @@ config SND_SOC_FSL_SAI
+ config SND_SOC_FSL_MQS
+ tristate "Medium Quality Sound (MQS) module support"
+ depends on SND_SOC_FSL_SAI
++ depends on IMX_SCMI_MISC_DRV || !IMX_SCMI_MISC_DRV
+ select REGMAP_MMIO
+ help
+ Say Y if you want to add Medium Quality Sound (MQS)
+--
+2.39.5
+
--- /dev/null
+From 0fdafff1cad66f14ff6ce0e4434ab26ae0d1e4e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Dec 2024 00:19:34 +0100
+Subject: i2c: pnx: Fix timeout in wait functions
+
+From: Vladimir Riabchun <ferr.lambarginio@gmail.com>
+
+[ Upstream commit 7363f2d4c18557c99c536b70489187bb4e05c412 ]
+
+Since commit f63b94be6942 ("i2c: pnx: Fix potential deadlock warning
+from del_timer_sync() call in isr") jiffies are stored in
+i2c_pnx_algo_data.timeout, but wait_timeout and wait_reset are still
+using it as milliseconds. Convert jiffies back to milliseconds to wait
+for the expected amount of time.
+
+Fixes: f63b94be6942 ("i2c: pnx: Fix potential deadlock warning from del_timer_sync() call in isr")
+Signed-off-by: Vladimir Riabchun <ferr.lambarginio@gmail.com>
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-pnx.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
+index 1dafadda73af..135300f3b534 100644
+--- a/drivers/i2c/busses/i2c-pnx.c
++++ b/drivers/i2c/busses/i2c-pnx.c
+@@ -95,7 +95,7 @@ enum {
+
+ static inline int wait_timeout(struct i2c_pnx_algo_data *data)
+ {
+- long timeout = data->timeout;
++ long timeout = jiffies_to_msecs(data->timeout);
+ while (timeout > 0 &&
+ (ioread32(I2C_REG_STS(data)) & mstatus_active)) {
+ mdelay(1);
+@@ -106,7 +106,7 @@ static inline int wait_timeout(struct i2c_pnx_algo_data *data)
+
+ static inline int wait_reset(struct i2c_pnx_algo_data *data)
+ {
+- long timeout = data->timeout;
++ long timeout = jiffies_to_msecs(data->timeout);
+ while (timeout > 0 &&
+ (ioread32(I2C_REG_CTL(data)) & mcntrl_reset)) {
+ mdelay(1);
+--
+2.39.5
+
--- /dev/null
+From 7db091e222bec2ba7fbad34cbcf8a5195bfdf7b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2024 12:40:11 +0000
+Subject: net: stmmac: fix TSO DMA API usage causing oops
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 4c49f38e20a57f8abaebdf95b369295b153d1f8e ]
+
+Commit 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap
+for non-paged SKB data") moved the assignment of tx_skbuff_dma[]'s
+members to be later in stmmac_tso_xmit().
+
+The buf (dma cookie) and len stored in this structure are passed to
+dma_unmap_single() by stmmac_tx_clean(). The DMA API requires that
+the dma cookie passed to dma_unmap_single() is the same as the value
+returned from dma_map_single(). However, by moving the assignment
+later, this is not the case when priv->dma_cap.addr64 > 32 as "des"
+is offset by proto_hdr_len.
+
+This causes problems such as:
+
+ dwc-eth-dwmac 2490000.ethernet eth0: Tx DMA map failed
+
+and with DMA_API_DEBUG enabled:
+
+ DMA-API: dwc-eth-dwmac 2490000.ethernet: device driver tries to +free DMA memory it has not allocated [device address=0x000000ffffcf65c0] [size=66 bytes]
+
+Fix this by maintaining "des" as the original DMA cookie, and use
+tso_des to pass the offset DMA cookie to stmmac_tso_allocator().
+
+Full details of the crashes can be found at:
+https://lore.kernel.org/all/d8112193-0386-4e14-b516-37c2d838171a@nvidia.com/
+https://lore.kernel.org/all/klkzp5yn5kq5efgtrow6wbvnc46bcqfxs65nz3qy77ujr5turc@bwwhelz2l4dw/
+
+Reported-by: Jon Hunter <jonathanh@nvidia.com>
+Reported-by: Thierry Reding <thierry.reding@gmail.com>
+Fixes: 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap for non-paged SKB data")
+Tested-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Reviewed-by: Furong Xu <0x1207@gmail.com>
+Link: https://patch.msgid.link/E1tJXcx-006N4Z-PC@rmk-PC.armlinux.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 766213ee82c1..cf7b59b8cc64 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -4220,8 +4220,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
+ struct stmmac_txq_stats *txq_stats;
+ struct stmmac_tx_queue *tx_q;
+ u32 pay_len, mss, queue;
++ dma_addr_t tso_des, des;
+ u8 proto_hdr_len, hdr;
+- dma_addr_t des;
+ bool set_ic;
+ int i;
+
+@@ -4317,14 +4317,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
+
+ /* If needed take extra descriptors to fill the remaining payload */
+ tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
++ tso_des = des;
+ } else {
+ stmmac_set_desc_addr(priv, first, des);
+ tmp_pay_len = pay_len;
+- des += proto_hdr_len;
++ tso_des = des + proto_hdr_len;
+ pay_len = 0;
+ }
+
+- stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
++ stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue);
+
+ /* In case two or more DMA transmit descriptors are allocated for this
+ * non-paged SKB data, the DMA buffer address should be saved to
+--
+2.39.5
+
--- /dev/null
+From 95a23eb5715a3a7d024f9e4171b17925524c53ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:36 +0900
+Subject: p2sb: Do not scan and remove the P2SB device when it is unhidden
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 360c400d0f568636c1b98d1d5f9f49aa3d420c70 ]
+
+When drivers access P2SB device resources, it calls p2sb_bar(). Before
+the commit 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls
+during PCI device probe"), p2sb_bar() obtained the resources and then
+called pci_stop_and_remove_bus_device() for clean up. Then the P2SB
+device disappeared. The commit 5913320eb0b3 introduced the P2SB device
+resource cache feature in the boot process. During the resource cache,
+pci_stop_and_remove_bus_device() is called for the P2SB device, then the
+P2SB device disappears regardless of whether p2sb_bar() is called or
+not. Such P2SB device disappearance caused a confusion [1]. To avoid the
+confusion, avoid the pci_stop_and_remove_bus_device() call when the BIOS
+does not hide the P2SB device.
+
+For that purpose, cache the P2SB device resources only if the BIOS hides
+the P2SB device. Call p2sb_scan_and_cache() only if p2sb_hidden_by_bios
+is true. This allows removing two branches from p2sb_scan_and_cache().
+When p2sb_bar() is called, get the resources from the cache if the P2SB
+device is hidden. Otherwise, read the resources from the unhidden P2SB
+device.
+
+Reported-by: Daniel Walker (danielwa) <danielwa@cisco.com>
+Closes: https://lore.kernel.org/lkml/ZzTI+biIUTvFT6NC@goliath/ [1]
+Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-5-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 42 +++++++++++++++++++++++++++++--------
+ 1 file changed, 33 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 0bc6b21c4c20..c56650b9ff96 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -100,10 +100,8 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+ /*
+ * The BIOS prevents the P2SB device from being enumerated by the PCI
+ * subsystem, so we need to unhide and hide it back to lookup the BAR.
+- * Unhide the P2SB device here, if needed.
+ */
+- if (p2sb_hidden_by_bios)
+- pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
++ pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
+
+ /* Scan the P2SB device and cache its BAR0 */
+ p2sb_scan_and_cache_devfn(bus, devfn);
+@@ -112,9 +110,7 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+ if (devfn == P2SB_DEVFN_GOLDMONT)
+ p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+
+- /* Hide the P2SB device, if it was hidden */
+- if (p2sb_hidden_by_bios)
+- pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
++ pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
+
+ if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
+ return -ENOENT;
+@@ -141,7 +137,7 @@ static int p2sb_cache_resources(void)
+ u32 value = P2SBC_HIDE;
+ struct pci_bus *bus;
+ u16 class;
+- int ret;
++ int ret = 0;
+
+ /* Get devfn for P2SB device itself */
+ p2sb_get_devfn(&devfn_p2sb);
+@@ -167,7 +163,12 @@ static int p2sb_cache_resources(void)
+ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+ p2sb_hidden_by_bios = value & P2SBC_HIDE;
+
+- ret = p2sb_scan_and_cache(bus, devfn_p2sb);
++ /*
++ * If the BIOS does not hide the P2SB device then its resources
++ * are accesilble. Cache them only if the P2SB device is hidden.
++ */
++ if (p2sb_hidden_by_bios)
++ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+
+ pci_unlock_rescan_remove();
+
+@@ -190,6 +191,26 @@ static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn,
+ return 0;
+ }
+
++static int p2sb_read_from_dev(struct pci_bus *bus, unsigned int devfn,
++ struct resource *mem)
++{
++ struct pci_dev *pdev;
++ int ret = 0;
++
++ pdev = pci_get_slot(bus, devfn);
++ if (!pdev)
++ return -ENODEV;
++
++ if (p2sb_valid_resource(pci_resource_n(pdev, 0)))
++ p2sb_read_bar0(pdev, mem);
++ else
++ ret = -ENOENT;
++
++ pci_dev_put(pdev);
++
++ return ret;
++}
++
+ /**
+ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+ * @bus: PCI bus to communicate with
+@@ -213,7 +234,10 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ if (!devfn)
+ p2sb_get_devfn(&devfn);
+
+- return p2sb_read_from_cache(bus, devfn, mem);
++ if (p2sb_hidden_by_bios)
++ return p2sb_read_from_cache(bus, devfn, mem);
++
++ return p2sb_read_from_dev(bus, devfn, mem);
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
+
+--
+2.39.5
+
--- /dev/null
+From 27398c9595ae269e34ca424295794a655b0c9519 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:33 +0900
+Subject: p2sb: Factor out p2sb_read_from_cache()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 9244524d60ddea55f4df54c51200e8fef2032447 ]
+
+To prepare for the following fix, factor out the code to read the P2SB
+resource from the cache to the new function p2sb_read_from_cache().
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-2-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 31f38309b389..aa34b8a69bc1 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -171,6 +171,22 @@ static int p2sb_cache_resources(void)
+ return ret;
+ }
+
++static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn,
++ struct resource *mem)
++{
++ struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
++
++ if (cache->bus_dev_id != bus->dev.id)
++ return -ENODEV;
++
++ if (!p2sb_valid_resource(&cache->res))
++ return -ENOENT;
++
++ memcpy(mem, &cache->res, sizeof(*mem));
++
++ return 0;
++}
++
+ /**
+ * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+ * @bus: PCI bus to communicate with
+@@ -187,8 +203,6 @@ static int p2sb_cache_resources(void)
+ */
+ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ {
+- struct p2sb_res_cache *cache;
+-
+ bus = p2sb_get_bus(bus);
+ if (!bus)
+ return -ENODEV;
+@@ -196,15 +210,7 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ if (!devfn)
+ p2sb_get_devfn(&devfn);
+
+- cache = &p2sb_resources[PCI_FUNC(devfn)];
+- if (cache->bus_dev_id != bus->dev.id)
+- return -ENODEV;
+-
+- if (!p2sb_valid_resource(&cache->res))
+- return -ENOENT;
+-
+- memcpy(mem, &cache->res, sizeof(*mem));
+- return 0;
++ return p2sb_read_from_cache(bus, devfn, mem);
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
+
+--
+2.39.5
+
--- /dev/null
+From 1da75876350e6c54be662f7dbf00e4b2261190e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:34 +0900
+Subject: p2sb: Introduce the global flag p2sb_hidden_by_bios
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit ae3e6ebc5ab046d434c05c58a3e3f7e94441fec2 ]
+
+To prepare for the following fix, introduce the global flag
+p2sb_hidden_by_bios. Check if the BIOS hides the P2SB device and store
+the result in the flag. This allows to refer to the check result across
+functions.
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-3-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index aa34b8a69bc1..273ac90c8fbd 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -42,6 +42,7 @@ struct p2sb_res_cache {
+ };
+
+ static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
++static bool p2sb_hidden_by_bios;
+
+ static void p2sb_get_devfn(unsigned int *devfn)
+ {
+@@ -157,13 +158,14 @@ static int p2sb_cache_resources(void)
+ * Unhide the P2SB device here, if needed.
+ */
+ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+- if (value & P2SBC_HIDE)
++ p2sb_hidden_by_bios = value & P2SBC_HIDE;
++ if (p2sb_hidden_by_bios)
+ pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+
+ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+
+ /* Hide the P2SB device, if it was hidden */
+- if (value & P2SBC_HIDE)
++ if (p2sb_hidden_by_bios)
+ pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
+
+ pci_unlock_rescan_remove();
+--
+2.39.5
+
--- /dev/null
+From 2db2be13b4ed140c36ac1a0b515d000ee3274e6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:35 +0900
+Subject: p2sb: Move P2SB hide and unhide code to p2sb_scan_and_cache()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 0286070c74ee48391fc07f7f617460479472d221 ]
+
+To prepare for the following fix, move the code to hide and unhide the
+P2SB device from p2sb_cache_resources() to p2sb_scan_and_cache().
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-4-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 273ac90c8fbd..0bc6b21c4c20 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -97,6 +97,14 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
+
+ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+ {
++ /*
++ * The BIOS prevents the P2SB device from being enumerated by the PCI
++ * subsystem, so we need to unhide and hide it back to lookup the BAR.
++ * Unhide the P2SB device here, if needed.
++ */
++ if (p2sb_hidden_by_bios)
++ pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
++
+ /* Scan the P2SB device and cache its BAR0 */
+ p2sb_scan_and_cache_devfn(bus, devfn);
+
+@@ -104,6 +112,10 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+ if (devfn == P2SB_DEVFN_GOLDMONT)
+ p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+
++ /* Hide the P2SB device, if it was hidden */
++ if (p2sb_hidden_by_bios)
++ pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
++
+ if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
+ return -ENOENT;
+
+@@ -152,22 +164,11 @@ static int p2sb_cache_resources(void)
+ */
+ pci_lock_rescan_remove();
+
+- /*
+- * The BIOS prevents the P2SB device from being enumerated by the PCI
+- * subsystem, so we need to unhide and hide it back to lookup the BAR.
+- * Unhide the P2SB device here, if needed.
+- */
+ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+ p2sb_hidden_by_bios = value & P2SBC_HIDE;
+- if (p2sb_hidden_by_bios)
+- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+
+ ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+
+- /* Hide the P2SB device, if it was hidden */
+- if (p2sb_hidden_by_bios)
+- pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
+-
+ pci_unlock_rescan_remove();
+
+ return ret;
+--
+2.39.5
+
--- /dev/null
+From e357b868a26c8404b85881e96039e0d7dbf67171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Nov 2024 04:18:40 +0000
+Subject: RISC-V: KVM: Fix csr_write -> csr_set for HVIEN PMU overflow bit
+
+From: Michael Neuling <michaelneuling@tenstorrent.com>
+
+[ Upstream commit ea6398a5af81e3e7fb3da5d261694d479a321fd9 ]
+
+This doesn't cause a problem currently as HVIEN isn't used elsewhere
+yet. Found by inspection.
+
+Signed-off-by: Michael Neuling <michaelneuling@tenstorrent.com>
+Fixes: 16b0bde9a37c ("RISC-V: KVM: Add perf sampling support for guests")
+Reviewed-by: Atish Patra <atishp@rivosinc.com>
+Reviewed-by: Anup Patel <anup@brainfault.org>
+Link: https://lore.kernel.org/r/20241127041840.419940-1-michaelneuling@tenstorrent.com
+Signed-off-by: Anup Patel <anup@brainfault.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kvm/aia.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
+index 2967d305c442..9f3b527596de 100644
+--- a/arch/riscv/kvm/aia.c
++++ b/arch/riscv/kvm/aia.c
+@@ -552,7 +552,7 @@ void kvm_riscv_aia_enable(void)
+ csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+ /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+- csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
++ csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF));
+ }
+
+ void kvm_riscv_aia_disable(void)
+--
+2.39.5
+
--- /dev/null
+From 0d7624d8f519497f54bf832e96a0d8646cc8fe01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 17:43:48 +0100
+Subject: s390/ipl: Fix never less than zero warning
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+[ Upstream commit 5fa49dd8e521a42379e5e41fcf2c92edaaec0a8b ]
+
+DEFINE_IPL_ATTR_STR_RW() macro produces "unsigned 'len' is never less
+than zero." warning when sys_vmcmd_on_*_store() callbacks are defined.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202412081614.5uel8F6W-lkp@intel.com/
+Fixes: 247576bf624a ("s390/ipl: Do not accept z/VM CP diag X'008' cmds longer than max length")
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/ipl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
+index f17bb7bf9392..5fa203f4bc6b 100644
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ if (len >= sizeof(_value)) \
+ return -E2BIG; \
+ len = strscpy(_value, buf, sizeof(_value)); \
+- if (len < 0) \
++ if ((ssize_t)len < 0) \
+ return len; \
+ strim(_value); \
+ return len; \
+--
+2.39.5
+
--- /dev/null
+From 025f5a1045222edc3b8a26a391636ca98d5dbcdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 12:35:34 +0100
+Subject: s390/mm: Consider KMSAN modules metadata for paging levels
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+[ Upstream commit 282da38b465395c930687974627c24f47ddce5ff ]
+
+The calculation determining whether to use three- or four-level paging
+didn't account for KMSAN modules metadata. Include this metadata in the
+virtual memory size calculation to ensure correct paging mode selection
+and avoiding potentially unnecessary physical memory size limitations.
+
+Fixes: 65ca73f9fb36 ("s390/mm: define KMSAN metadata for vmalloc and modules")
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/boot/startup.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
+index c8f149ad77e5..c2ee0745f59e 100644
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -231,6 +231,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
+ vsize = round_up(SZ_2G + max_mappable, rte_size) +
+ round_up(vmemmap_size, rte_size) +
+ FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
++ if (IS_ENABLED(CONFIG_KMSAN))
++ vsize += MODULES_LEN * 2;
+ return size_add(vsize, vmalloc_size);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 44c8bb66dcae7fbaaa7077cd0e045d83aac1b658 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 22:22:36 -0500
+Subject: sched/dlserver: Fix dlserver double enqueue
+
+From: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
+
+[ Upstream commit b53127db1dbf7f1047cf35c10922d801dcd40324 ]
+
+dlserver can get dequeued during a dlserver pick_task due to the delayed
+deueue feature and this can lead to issues with dlserver logic as it
+still thinks that dlserver is on the runqueue. The dlserver throttling
+and replenish logic gets confused and can lead to double enqueue of
+dlserver.
+
+Double enqueue of dlserver could happend due to couple of reasons:
+
+Case 1
+------
+
+Delayed dequeue feature[1] can cause dlserver being stopped during a
+pick initiated by dlserver:
+ __pick_next_task
+ pick_task_dl -> server_pick_task
+ pick_task_fair
+ pick_next_entity (if (sched_delayed))
+ dequeue_entities
+ dl_server_stop
+
+server_pick_task goes ahead with update_curr_dl_se without knowing that
+dlserver is dequeued and this confuses the logic and may lead to
+unintended enqueue while the server is stopped.
+
+Case 2
+------
+A race condition between a task dequeue on one cpu and same task's enqueue
+on this cpu by a remote cpu while the lock is released causing dlserver
+double enqueue.
+
+One cpu would be in the schedule() and releasing RQ-lock:
+
+current->state = TASK_INTERRUPTIBLE();
+ schedule();
+ deactivate_task()
+ dl_stop_server();
+ pick_next_task()
+ pick_next_task_fair()
+ sched_balance_newidle()
+ rq_unlock(this_rq)
+
+at which point another CPU can take our RQ-lock and do:
+
+ try_to_wake_up()
+ ttwu_queue()
+ rq_lock()
+ ...
+ activate_task()
+ dl_server_start() --> first enqueue
+ wakeup_preempt() := check_preempt_wakeup_fair()
+ update_curr()
+ update_curr_task()
+ if (current->dl_server)
+ dl_server_update()
+ enqueue_dl_entity() --> second enqueue
+
+This bug was not apparent as the enqueue in dl_server_start doesn't
+usually happen because of the defer logic. But as a side effect of the
+first case(dequeue during dlserver pick), dl_throttled and dl_yield will
+be set and this causes the time accounting of dlserver to messup and
+then leading to a enqueue in dl_server_start.
+
+Have an explicit flag representing the status of dlserver to avoid the
+confusion. This is set in dl_server_start and reset in dlserver_stop.
+
+Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Marcel Ziswiler <marcel.ziswiler@codethink.co.uk> # ROCK 5B
+Link: https://lkml.kernel.org/r/20241213032244.877029-1-vineeth@bitbyteword.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h | 7 +++++++
+ kernel/sched/deadline.c | 8 ++++++--
+ kernel/sched/sched.h | 5 +++++
+ 3 files changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index bb343136ddd0..c14446c6164d 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -656,6 +656,12 @@ struct sched_dl_entity {
+ * @dl_defer_armed tells if the deferrable server is waiting
+ * for the replenishment timer to activate it.
+ *
++ * @dl_server_active tells if the dlserver is active(started).
++ * dlserver is started on first cfs enqueue on an idle runqueue
++ * and is stopped when a dequeue results in 0 cfs tasks on the
++ * runqueue. In other words, dlserver is active only when cpu's
++ * runqueue has atleast one cfs task.
++ *
+ * @dl_defer_running tells if the deferrable server is actually
+ * running, skipping the defer phase.
+ */
+@@ -664,6 +670,7 @@ struct sched_dl_entity {
+ unsigned int dl_non_contending : 1;
+ unsigned int dl_overrun : 1;
+ unsigned int dl_server : 1;
++ unsigned int dl_server_active : 1;
+ unsigned int dl_defer : 1;
+ unsigned int dl_defer_armed : 1;
+ unsigned int dl_defer_running : 1;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index fc6f41ac33eb..a17c23b53049 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
+ if (!dl_se->dl_runtime)
+ return;
+
++ dl_se->dl_server_active = 1;
+ enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+ if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+ resched_curr(dl_se->rq);
+@@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
+ hrtimer_try_to_cancel(&dl_se->dl_timer);
+ dl_se->dl_defer_armed = 0;
+ dl_se->dl_throttled = 0;
++ dl_se->dl_server_active = 0;
+ }
+
+ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+@@ -2420,8 +2422,10 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
+ if (dl_server(dl_se)) {
+ p = dl_se->server_pick_task(dl_se);
+ if (!p) {
+- dl_se->dl_yielded = 1;
+- update_curr_dl_se(rq, dl_se, 0);
++ if (dl_server_active(dl_se)) {
++ dl_se->dl_yielded = 1;
++ update_curr_dl_se(rq, dl_se, 0);
++ }
+ goto again;
+ }
+ rq->dl_server = dl_se;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c53696275ca1..f2ef520513c4 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -398,6 +398,11 @@ extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq
+ extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
+ u64 runtime, u64 period, bool init);
+
++static inline bool dl_server_active(struct sched_dl_entity *dl_se)
++{
++ return dl_se->dl_server_active;
++}
++
+ #ifdef CONFIG_CGROUP_SCHED
+
+ extern struct list_head task_groups;
+--
+2.39.5
+
--- /dev/null
+From c54ff10a7b8301b633e1aa2769b493f1ab170b56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 22:22:37 -0500
+Subject: sched/dlserver: Fix dlserver time accounting
+
+From: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
+
+[ Upstream commit c7f7e9c73178e0e342486fd31e7f363ef60e3f83 ]
+
+dlserver time is accounted when:
+ - dlserver is active and the dlserver proxies the cfs task.
+ - dlserver is active but deferred and cfs task runs after being picked
+ through the normal fair class pick.
+
+dl_server_update is called in two places to make sure that both the
+above times are accounted for. But it doesn't check if dlserver is
+active or not. Now that we have this dl_server_active flag, we can
+consolidate dl_server_update into one place and all we need to check is
+whether dlserver is active or not. When dlserver is active there is only
+two possible conditions:
+ - dlserver is deferred.
+ - cfs task is running on behalf of dlserver.
+
+Fixes: a110a81c52a9 ("sched/deadline: Deferrable dl server")
+Signed-off-by: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Marcel Ziswiler <marcel.ziswiler@codethink.co.uk> # ROCK 5B
+Link: https://lore.kernel.org/r/20241213032244.877029-2-vineeth@bitbyteword.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 93142f9077c7..1ca96c99872f 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1159,8 +1159,6 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
+ trace_sched_stat_runtime(p, delta_exec);
+ account_group_exec_runtime(p, delta_exec);
+ cgroup_account_cputime(p, delta_exec);
+- if (p->dl_server)
+- dl_server_update(p->dl_server, delta_exec);
+ }
+
+ static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -1237,11 +1235,16 @@ static void update_curr(struct cfs_rq *cfs_rq)
+ update_curr_task(p, delta_exec);
+
+ /*
+- * Any fair task that runs outside of fair_server should
+- * account against fair_server such that it can account for
+- * this time and possibly avoid running this period.
++ * If the fair_server is active, we need to account for the
++ * fair_server time whether or not the task is running on
++ * behalf of fair_server or not:
++ * - If the task is running on behalf of fair_server, we need
++ * to limit its time based on the assigned runtime.
++ * - Fair task that runs outside of fair_server should account
++ * against fair_server such that it can account for this time
++ * and possibly avoid running this period.
+ */
+- if (p->dl_server != &rq->fair_server)
++ if (dl_server_active(&rq->fair_server))
+ dl_server_update(&rq->fair_server, delta_exec);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 6d6343fb58e0c45c04860781661503dcde5fbd6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Dec 2024 18:45:57 +0100
+Subject: sched/eevdf: More PELT vs DELAYED_DEQUEUE
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 76f2f783294d7d55c2564e2dfb0a7279ba0bc264 ]
+
+Vincent and Dietmar noted that while
+commit fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE") fixes
+the entity runnable stats, it does not adjust the cfs_rq runnable stats,
+which are based off of h_nr_running.
+
+Track h_nr_delayed such that we can discount those and adjust the
+signal.
+
+Fixes: fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE")
+Closes: https://lore.kernel.org/lkml/a9a45193-d0c6-4ba2-a822-464ad30b550e@arm.com/
+Closes: https://lore.kernel.org/lkml/CAKfTPtCNUvWE_GX5LyvTF-WdxUT=ZgvZZv-4t=eWntg5uOFqiQ@mail.gmail.com/
+[ Fixes checkpatch warnings and rebased ]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reported-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Link: https://lore.kernel.org/r/20241202174606.4074512-3-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/debug.c | 1 +
+ kernel/sched/fair.c | 51 +++++++++++++++++++++++++++++++++++++++-----
+ kernel/sched/pelt.c | 2 +-
+ kernel/sched/sched.h | 8 +++++--
+ 4 files changed, 54 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index f4035c7a0fa1..82b165bf48c4 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -844,6 +844,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
+ SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
+ SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
++ SEQ_printf(m, " .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed);
+ SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running",
+ cfs_rq->idle_nr_running);
+ SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running",
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index c467e389cd6f..93142f9077c7 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5471,9 +5471,33 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+
+ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+
+-static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
++static void set_delayed(struct sched_entity *se)
++{
++ se->sched_delayed = 1;
++ for_each_sched_entity(se) {
++ struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++ cfs_rq->h_nr_delayed++;
++ if (cfs_rq_throttled(cfs_rq))
++ break;
++ }
++}
++
++static void clear_delayed(struct sched_entity *se)
+ {
+ se->sched_delayed = 0;
++ for_each_sched_entity(se) {
++ struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++ cfs_rq->h_nr_delayed--;
++ if (cfs_rq_throttled(cfs_rq))
++ break;
++ }
++}
++
++static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
++{
++ clear_delayed(se);
+ if (sched_feat(DELAY_ZERO) && se->vlag > 0)
+ se->vlag = 0;
+ }
+@@ -5502,7 +5526,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ if (sched_feat(DELAY_DEQUEUE) && delay &&
+ !entity_eligible(cfs_rq, se)) {
+ update_load_avg(cfs_rq, se, 0);
+- se->sched_delayed = 1;
++ set_delayed(se);
+ return false;
+ }
+ }
+@@ -5920,7 +5944,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ struct rq *rq = rq_of(cfs_rq);
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+ struct sched_entity *se;
+- long task_delta, idle_task_delta, dequeue = 1;
++ long task_delta, idle_task_delta, delayed_delta, dequeue = 1;
+ long rq_h_nr_running = rq->cfs.h_nr_running;
+
+ raw_spin_lock(&cfs_b->lock);
+@@ -5953,6 +5977,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ task_delta = cfs_rq->h_nr_running;
+ idle_task_delta = cfs_rq->idle_h_nr_running;
++ delayed_delta = cfs_rq->h_nr_delayed;
+ for_each_sched_entity(se) {
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+ int flags;
+@@ -5976,6 +6001,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ qcfs_rq->h_nr_running -= task_delta;
+ qcfs_rq->idle_h_nr_running -= idle_task_delta;
++ qcfs_rq->h_nr_delayed -= delayed_delta;
+
+ if (qcfs_rq->load.weight) {
+ /* Avoid re-evaluating load for this entity: */
+@@ -5998,6 +6024,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ qcfs_rq->h_nr_running -= task_delta;
+ qcfs_rq->idle_h_nr_running -= idle_task_delta;
++ qcfs_rq->h_nr_delayed -= delayed_delta;
+ }
+
+ /* At this point se is NULL and we are at root level*/
+@@ -6023,7 +6050,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ struct rq *rq = rq_of(cfs_rq);
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+ struct sched_entity *se;
+- long task_delta, idle_task_delta;
++ long task_delta, idle_task_delta, delayed_delta;
+ long rq_h_nr_running = rq->cfs.h_nr_running;
+
+ se = cfs_rq->tg->se[cpu_of(rq)];
+@@ -6059,6 +6086,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ task_delta = cfs_rq->h_nr_running;
+ idle_task_delta = cfs_rq->idle_h_nr_running;
++ delayed_delta = cfs_rq->h_nr_delayed;
+ for_each_sched_entity(se) {
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
+@@ -6076,6 +6104,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
++ qcfs_rq->h_nr_delayed += delayed_delta;
+
+ /* end evaluation on encountering a throttled cfs_rq */
+ if (cfs_rq_throttled(qcfs_rq))
+@@ -6093,6 +6122,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
++ qcfs_rq->h_nr_delayed += delayed_delta;
+
+ /* end evaluation on encountering a throttled cfs_rq */
+ if (cfs_rq_throttled(qcfs_rq))
+@@ -6946,7 +6976,7 @@ requeue_delayed_entity(struct sched_entity *se)
+ }
+
+ update_load_avg(cfs_rq, se, 0);
+- se->sched_delayed = 0;
++ clear_delayed(se);
+ }
+
+ /*
+@@ -6960,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
+ int idle_h_nr_running = task_has_idle_policy(p);
++ int h_nr_delayed = 0;
+ int task_new = !(flags & ENQUEUE_WAKEUP);
+ int rq_h_nr_running = rq->cfs.h_nr_running;
+ u64 slice = 0;
+@@ -6986,6 +7017,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ if (p->in_iowait)
+ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
+
++ if (task_new)
++ h_nr_delayed = !!se->sched_delayed;
++
+ for_each_sched_entity(se) {
+ if (se->on_rq) {
+ if (se->sched_delayed)
+@@ -7008,6 +7042,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+
+ cfs_rq->h_nr_running++;
+ cfs_rq->idle_h_nr_running += idle_h_nr_running;
++ cfs_rq->h_nr_delayed += h_nr_delayed;
+
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+@@ -7031,6 +7066,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+
+ cfs_rq->h_nr_running++;
+ cfs_rq->idle_h_nr_running += idle_h_nr_running;
++ cfs_rq->h_nr_delayed += h_nr_delayed;
+
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+@@ -7093,6 +7129,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ struct task_struct *p = NULL;
+ int idle_h_nr_running = 0;
+ int h_nr_running = 0;
++ int h_nr_delayed = 0;
+ struct cfs_rq *cfs_rq;
+ u64 slice = 0;
+
+@@ -7100,6 +7137,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ p = task_of(se);
+ h_nr_running = 1;
+ idle_h_nr_running = task_has_idle_policy(p);
++ if (!task_sleep && !task_delayed)
++ h_nr_delayed = !!se->sched_delayed;
+ } else {
+ cfs_rq = group_cfs_rq(se);
+ slice = cfs_rq_min_slice(cfs_rq);
+@@ -7117,6 +7156,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+
+ cfs_rq->h_nr_running -= h_nr_running;
+ cfs_rq->idle_h_nr_running -= idle_h_nr_running;
++ cfs_rq->h_nr_delayed -= h_nr_delayed;
+
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = h_nr_running;
+@@ -7155,6 +7195,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+
+ cfs_rq->h_nr_running -= h_nr_running;
+ cfs_rq->idle_h_nr_running -= idle_h_nr_running;
++ cfs_rq->h_nr_delayed -= h_nr_delayed;
+
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = h_nr_running;
+diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
+index a9c65d97b3ca..171a802420a1 100644
+--- a/kernel/sched/pelt.c
++++ b/kernel/sched/pelt.c
+@@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
+ {
+ if (___update_load_sum(now, &cfs_rq->avg,
+ scale_load_down(cfs_rq->load.weight),
+- cfs_rq->h_nr_running,
++ cfs_rq->h_nr_running - cfs_rq->h_nr_delayed,
+ cfs_rq->curr != NULL)) {
+
+ ___update_load_avg(&cfs_rq->avg, 1);
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c03b3d7b320e..c53696275ca1 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -649,6 +649,7 @@ struct cfs_rq {
+ unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
+ unsigned int idle_nr_running; /* SCHED_IDLE */
+ unsigned int idle_h_nr_running; /* SCHED_IDLE */
++ unsigned int h_nr_delayed;
+
+ s64 avg_vruntime;
+ u64 avg_load;
+@@ -898,8 +899,11 @@ struct dl_rq {
+
+ static inline void se_update_runnable(struct sched_entity *se)
+ {
+- if (!entity_is_task(se))
+- se->runnable_weight = se->my_q->h_nr_running;
++ if (!entity_is_task(se)) {
++ struct cfs_rq *cfs_rq = se->my_q;
++
++ se->runnable_weight = cfs_rq->h_nr_running - cfs_rq->h_nr_delayed;
++ }
+ }
+
+ static inline long se_runnable(struct sched_entity *se)
+--
+2.39.5
+
--- /dev/null
+From 4ad329aa5f61d1ce6c81094d25ee4a35dc4e5ebb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 12:59:54 +0530
+Subject: sched/fair: Fix NEXT_BUDDY
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit 493afbd187c4c9cc1642792c0d9ba400c3d6d90d ]
+
+Adam reports that enabling NEXT_BUDDY insta triggers a WARN in
+pick_next_entity().
+
+Moving clear_buddies() up before the delayed dequeue bits ensures
+no ->next buddy becomes delayed. Further ensure no new ->next buddy
+ever starts as delayed.
+
+Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
+Reported-by: Adam Li <adamli@os.amperecomputing.com>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Adam Li <adamli@os.amperecomputing.com>
+Link: https://lkml.kernel.org/r/670a0d54-e398-4b1f-8a6e-90784e2fdf89@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 782ce70ebd1b..c467e389cd6f 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5484,6 +5484,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ bool sleep = flags & DEQUEUE_SLEEP;
+
+ update_curr(cfs_rq);
++ clear_buddies(cfs_rq, se);
+
+ if (flags & DEQUEUE_DELAYED) {
+ SCHED_WARN_ON(!se->sched_delayed);
+@@ -5500,8 +5501,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ if (sched_feat(DELAY_DEQUEUE) && delay &&
+ !entity_eligible(cfs_rq, se)) {
+- if (cfs_rq->next == se)
+- cfs_rq->next = NULL;
+ update_load_avg(cfs_rq, se, 0);
+ se->sched_delayed = 1;
+ return false;
+@@ -5526,8 +5525,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+
+ update_stats_dequeue_fair(cfs_rq, se, flags);
+
+- clear_buddies(cfs_rq, se);
+-
+ update_entity_lag(cfs_rq, se);
+ if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
+ se->deadline -= se->vruntime;
+@@ -8786,7 +8783,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
+ if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
+ return;
+
+- if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
++ if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
+ set_next_buddy(pse);
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 4293c828ce43bf82f65abe9fd85d5138ddb58d99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Dec 2024 18:45:56 +0100
+Subject: sched/fair: Fix sched_can_stop_tick() for fair tasks
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit c1f43c342e1f2e32f0620bf2e972e2a9ea0a1e60 ]
+
+We can't stop the tick of a rq if there are at least 2 tasks enqueued in
+the whole hierarchy and not only at the root cfs rq.
+
+rq->cfs.nr_running tracks the number of sched_entity at one level
+whereas rq->cfs.h_nr_running tracks all queued tasks in the
+hierarchy.
+
+Fixes: 11cc374f4643b ("sched_ext: Simplify scx_can_stop_tick() invocation in sched_can_stop_tick()")
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Link: https://lore.kernel.org/r/20241202174606.4074512-2-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 6cc12777bb11..d07dc87787df 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1300,7 +1300,7 @@ bool sched_can_stop_tick(struct rq *rq)
+ if (scx_enabled() && !scx_can_stop_tick(rq))
+ return false;
+
+- if (rq->cfs.nr_running > 1)
++ if (rq->cfs.h_nr_running > 1)
+ return false;
+
+ /*
+--
+2.39.5
+
net-sched-fix-ordering-of-qlen-adjustment.patch
+net-stmmac-fix-tso-dma-api-usage-causing-oops.patch
+firmware-arm_scmi-fix-i.mx-build-dependency.patch
+firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch
+risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch
+sched-fair-fix-next_buddy.patch
+sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch
+sched-eevdf-more-pelt-vs-delayed_dequeue.patch
+p2sb-factor-out-p2sb_read_from_cache.patch
+p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch
+p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch
+p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch
+i2c-pnx-fix-timeout-in-wait-functions.patch
+s390-ipl-fix-never-less-than-zero-warning.patch
+erofs-fix-psi-memstall-accounting.patch
+sched-dlserver-fix-dlserver-double-enqueue.patch
+sched-dlserver-fix-dlserver-time-accounting.patch
+s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch
+erofs-add-erofs_sb_free-helper.patch
+erofs-use-struct-erofs_device_info-for-the-primary-d.patch
+erofs-reference-struct-erofs_device_info-for-erofs_m.patch
+erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch
+xfs-sb_spino_align-is-not-verified.patch
+xfs-fix-sparse-inode-limits-on-runt-ag.patch
+xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch
+xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch
+xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch
--- /dev/null
+From 289808d43a0780664c93ce21d6b74573775fbc3b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:52 -0800
+Subject: xfs: fix off-by-one error in fsmap's end_daddr usage
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit a440a28ddbdcb861150987b4d6e828631656b92f upstream.
+
+In commit ca6448aed4f10a, we created an "end_daddr" variable to fix
+fsmap reporting when the end of the range requested falls in the middle
+of an unknown (aka free on the rmapbt) region. Unfortunately, I didn't
+notice that the the code sets end_daddr to the last sector of the device
+but then uses that quantity to compute the length of the synthesized
+mapping.
+
+Zizhi Wo later observed that when end_daddr isn't set, we still don't
+report the last fsblock on a device because in that case (aka when
+info->last is true), the info->high mapping that we pass to
+xfs_getfsmap_group_helper has a startblock that points to the last
+fsblock. This is also wrong because the code uses startblock to
+compute the length of the synthesized mapping.
+
+Fix the second problem by setting end_daddr unconditionally, and fix the
+first problem by setting start_daddr to one past the end of the range to
+query.
+
+Cc: <stable@vger.kernel.org> # v6.11
+Fixes: ca6448aed4f10a ("xfs: Fix missing interval for missing_owner in xfs fsmap")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reported-by: Zizhi Wo <wozizhi@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_fsmap.c | 29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
+index ae18ab86e608..8712b891defb 100644
+--- a/fs/xfs/xfs_fsmap.c
++++ b/fs/xfs/xfs_fsmap.c
+@@ -162,7 +162,8 @@ struct xfs_getfsmap_info {
+ xfs_daddr_t next_daddr; /* next daddr we expect */
+ /* daddr of low fsmap key when we're using the rtbitmap */
+ xfs_daddr_t low_daddr;
+- xfs_daddr_t end_daddr; /* daddr of high fsmap key */
++ /* daddr of high fsmap key, or the last daddr on the device */
++ xfs_daddr_t end_daddr;
+ u64 missing_owner; /* owner of holes */
+ u32 dev; /* device id */
+ /*
+@@ -306,7 +307,7 @@ xfs_getfsmap_helper(
+ * Note that if the btree query found a mapping, there won't be a gap.
+ */
+ if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
+- rec_daddr = info->end_daddr;
++ rec_daddr = info->end_daddr + 1;
+
+ /* Are we just counting mappings? */
+ if (info->head->fmh_count == 0) {
+@@ -898,7 +899,10 @@ xfs_getfsmap(
+ struct xfs_trans *tp = NULL;
+ struct xfs_fsmap dkeys[2]; /* per-dev keys */
+ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
+- struct xfs_getfsmap_info info = { NULL };
++ struct xfs_getfsmap_info info = {
++ .fsmap_recs = fsmap_recs,
++ .head = head,
++ };
+ bool use_rmap;
+ int i;
+ int error = 0;
+@@ -963,9 +967,6 @@ xfs_getfsmap(
+
+ info.next_daddr = head->fmh_keys[0].fmr_physical +
+ head->fmh_keys[0].fmr_length;
+- info.end_daddr = XFS_BUF_DADDR_NULL;
+- info.fsmap_recs = fsmap_recs;
+- info.head = head;
+
+ /* For each device we support... */
+ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
+@@ -978,17 +979,23 @@ xfs_getfsmap(
+ break;
+
+ /*
+- * If this device number matches the high key, we have
+- * to pass the high key to the handler to limit the
+- * query results. If the device number exceeds the
+- * low key, zero out the low key so that we get
+- * everything from the beginning.
++ * If this device number matches the high key, we have to pass
++ * the high key to the handler to limit the query results, and
++ * set the end_daddr so that we can synthesize records at the
++ * end of the query range or device.
+ */
+ if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
+ dkeys[1] = head->fmh_keys[1];
+ info.end_daddr = min(handlers[i].nr_sectors - 1,
+ dkeys[1].fmr_physical);
++ } else {
++ info.end_daddr = handlers[i].nr_sectors - 1;
+ }
++
++ /*
++ * If the device number exceeds the low key, zero out the low
++ * key so that we get everything from the beginning.
++ */
+ if (handlers[i].dev > head->fmh_keys[0].fmr_device)
+ memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
+
+--
+2.39.5
+
--- /dev/null
+From 3720905f499187a244d9070293aed91dc03c4b45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:51:07 -0800
+Subject: xfs: fix sb_spino_align checks for large fsblock sizes
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit 7f8a44f37229fc76bfcafa341a4b8862368ef44a upstream.
+
+For a sparse inodes filesystem, mkfs.xfs computes the values of
+sb_spino_align and sb_inoalignmt with the following code:
+
+ int cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+
+ if (cfg->sb_feat.crcs_enabled)
+ cluster_size *= cfg->inodesize / XFS_DINODE_MIN_SIZE;
+
+ sbp->sb_spino_align = cluster_size >> cfg->blocklog;
+ sbp->sb_inoalignmt = XFS_INODES_PER_CHUNK *
+ cfg->inodesize >> cfg->blocklog;
+
+On a V5 filesystem with 64k fsblocks and 512 byte inodes, this results
+in cluster_size = 8192 * (512 / 256) = 16384. As a result,
+sb_spino_align and sb_inoalignmt are both set to zero. Unfortunately,
+this trips the new sb_spino_align check that was just added to
+xfs_validate_sb_common, and the mkfs fails:
+
+# mkfs.xfs -f -b size=64k, /dev/sda
+meta-data=/dev/sda isize=512 agcount=4, agsize=81136 blks
+ = sectsz=512 attr=2, projid32bit=1
+ = crc=1 finobt=1, sparse=1, rmapbt=1
+ = reflink=1 bigtime=1 inobtcount=1 nrext64=1
+ = exchange=0 metadir=0
+data = bsize=65536 blocks=324544, imaxpct=25
+ = sunit=0 swidth=0 blks
+naming =version 2 bsize=65536 ascii-ci=0, ftype=1, parent=0
+log =internal log bsize=65536 blocks=5006, version=2
+ = sectsz=512 sunit=0 blks, lazy-count=1
+realtime =none extsz=65536 blocks=0, rtextents=0
+ = rgcount=0 rgsize=0 extents
+Discarding blocks...Sparse inode alignment (0) is invalid.
+Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
+libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
+mkfs.xfs: Releasing dirty buffer to free list!
+found dirty buffer (bulk) on free list!
+Sparse inode alignment (0) is invalid.
+Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
+libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
+mkfs.xfs: writing AG headers failed, err=22
+
+Prior to commit 59e43f5479cce1 this all worked fine, even if "sparse"
+inodes are somewhat meaningless when everything fits in a single
+fsblock. Adjust the checks to handle existing filesystems.
+
+Cc: <stable@vger.kernel.org> # v6.13-rc1
+Fixes: 59e43f5479cce1 ("xfs: sb_spino_align is not verified")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index 9e0ae312bc80..e27b63281d01 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -392,12 +392,13 @@ xfs_validate_sb_common(
+ return -EINVAL;
+ }
+
+- if (!sbp->sb_spino_align ||
+- sbp->sb_spino_align > sbp->sb_inoalignmt ||
+- (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
++ if (sbp->sb_spino_align &&
++ (sbp->sb_spino_align > sbp->sb_inoalignmt ||
++ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) {
+ xfs_warn(mp,
+- "Sparse inode alignment (%u) is invalid.",
+- sbp->sb_spino_align);
++"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).",
++ sbp->sb_spino_align,
++ sbp->sb_inoalignmt);
+ return -EINVAL;
+ }
+ } else if (sbp->sb_spino_align) {
+--
+2.39.5
+
--- /dev/null
+From fe0558bded8b5cfdd3143a116b3ec5c965377716 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:36 -0800
+Subject: xfs: fix sparse inode limits on runt AG
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 13325333582d4820d39b9e8f63d6a54e745585d9 upstream.
+
+The runt AG at the end of a filesystem is almost always smaller than
+the mp->m_sb.sb_agblocks. Unfortunately, when setting the max_agbno
+limit for the inode chunk allocation, we do not take this into
+account. This means we can allocate a sparse inode chunk that
+overlaps beyond the end of an AG. When we go to allocate an inode
+from that sparse chunk, the irec fails validation because the
+agbno of the start of the irec is beyond valid limits for the runt
+AG.
+
+Prevent this from happening by taking into account the size of the
+runt AG when allocating inode chunks. Also convert the various
+checks for valid inode chunk agbnos to use xfs_ag_block_count()
+so that they will also catch such issues in the future.
+
+Fixes: 56d1115c9bc7 ("xfs: allocate sparse inode chunks on full chunk allocation failure")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+[djwong: backport to stable because upstream maintainer ignored cc-stable]
+Link: https://lore.kernel.org/linux-xfs/20241112231539.GG9438@frogsfrogsfrogs/
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_ialloc.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
+index 271855227514..6258527315f2 100644
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -855,7 +855,8 @@ xfs_ialloc_ag_alloc(
+ * the end of the AG.
+ */
+ args.min_agbno = args.mp->m_sb.sb_inoalignmt;
+- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
++ args.max_agbno = round_down(xfs_ag_block_count(args.mp,
++ pag->pag_agno),
+ args.mp->m_sb.sb_inoalignmt) -
+ igeo->ialloc_blks;
+
+@@ -2332,9 +2333,9 @@ xfs_difree(
+ return -EINVAL;
+ }
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+- if (agbno >= mp->m_sb.sb_agblocks) {
+- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
+- __func__, agbno, mp->m_sb.sb_agblocks);
++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
++ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
++ __func__, agbno, xfs_ag_block_count(mp, pag->pag_agno));
+ ASSERT(0);
+ return -EINVAL;
+ }
+@@ -2457,7 +2458,7 @@ xfs_imap(
+ */
+ agino = XFS_INO_TO_AGINO(mp, ino);
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+- if (agbno >= mp->m_sb.sb_agblocks ||
++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno) ||
+ ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ error = -EINVAL;
+ #ifdef DEBUG
+@@ -2467,11 +2468,12 @@ xfs_imap(
+ */
+ if (flags & XFS_IGET_UNTRUSTED)
+ return error;
+- if (agbno >= mp->m_sb.sb_agblocks) {
++ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
+ xfs_alert(mp,
+ "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
+ __func__, (unsigned long long)agbno,
+- (unsigned long)mp->m_sb.sb_agblocks);
++ (unsigned long)xfs_ag_block_count(mp,
++ pag->pag_agno));
+ }
+ if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ xfs_alert(mp,
+--
+2.39.5
+
--- /dev/null
+From 6208be0894ce64483b9e968e9e5dbe3b4c7f31fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:51:23 -0800
+Subject: xfs: fix zero byte checking in the superblock scrubber
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit c004a793e0ec34047c3bd423bcd8966f5fac88dc upstream.
+
+The logic to check that the region past the end of the superblock is all
+zeroes is wrong -- we don't want to check only the bytes past the end of
+the maximally sized ondisk superblock structure as currently defined in
+xfs_format.h; we want to check the bytes beyond the end of the ondisk as
+defined by the feature bits.
+
+Port the superblock size logic from xfs_repair and then put it to use in
+xfs_scrub.
+
+Cc: <stable@vger.kernel.org> # v4.15
+Fixes: 21fb4cb1981ef7 ("xfs: scrub the secondary superblocks")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/agheader.c | 29 +++++++++++++++++++++++++++--
+ 1 file changed, 27 insertions(+), 2 deletions(-)
+
+diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
+index da30f926cbe6..0f2f1852d58f 100644
+--- a/fs/xfs/scrub/agheader.c
++++ b/fs/xfs/scrub/agheader.c
+@@ -59,6 +59,30 @@ xchk_superblock_xref(
+ /* scrub teardown will take care of sc->sa for us */
+ }
+
++/*
++ * Calculate the ondisk superblock size in bytes given the feature set of the
++ * mounted filesystem (aka the primary sb). This is subtlely different from
++ * the logic in xfs_repair, which computes the size of a secondary sb given the
++ * featureset listed in the secondary sb.
++ */
++STATIC size_t
++xchk_superblock_ondisk_size(
++ struct xfs_mount *mp)
++{
++ if (xfs_has_metauuid(mp))
++ return offsetofend(struct xfs_dsb, sb_meta_uuid);
++ if (xfs_has_crc(mp))
++ return offsetofend(struct xfs_dsb, sb_lsn);
++ if (xfs_sb_version_hasmorebits(&mp->m_sb))
++ return offsetofend(struct xfs_dsb, sb_bad_features2);
++ if (xfs_has_logv2(mp))
++ return offsetofend(struct xfs_dsb, sb_logsunit);
++ if (xfs_has_sector(mp))
++ return offsetofend(struct xfs_dsb, sb_logsectsize);
++ /* only support dirv2 or more recent */
++ return offsetofend(struct xfs_dsb, sb_dirblklog);
++}
++
+ /*
+ * Scrub the filesystem superblock.
+ *
+@@ -75,6 +99,7 @@ xchk_superblock(
+ struct xfs_buf *bp;
+ struct xfs_dsb *sb;
+ struct xfs_perag *pag;
++ size_t sblen;
+ xfs_agnumber_t agno;
+ uint32_t v2_ok;
+ __be32 features_mask;
+@@ -350,8 +375,8 @@ xchk_superblock(
+ }
+
+ /* Everything else must be zero. */
+- if (memchr_inv(sb + 1, 0,
+- BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
++ sblen = xchk_superblock_ondisk_size(mp);
++ if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen))
+ xchk_block_set_corrupt(sc, bp);
+
+ xchk_superblock_xref(sc, bp);
+--
+2.39.5
+
--- /dev/null
+From c5555322e43ec7aa41de5938da5bc8b4da32fba2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:20 -0800
+Subject: xfs: sb_spino_align is not verified
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 59e43f5479cce106d71c0b91a297c7ad1913176c upstream.
+
+It's just read in from the superblock and used without doing any
+validity checks at all on the value.
+
+Fixes: fb4f2b4e5a82 ("xfs: add sparse inode chunk alignment superblock field")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+[djwong: actually tag for 6.12 because upstream maintainer ignored cc-stable tag]
+Link: https://lore.kernel.org/linux-xfs/20241024165544.GI21853@frogsfrogsfrogs/
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index 02ebcbc4882f..9e0ae312bc80 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -391,6 +391,20 @@ xfs_validate_sb_common(
+ sbp->sb_inoalignmt, align);
+ return -EINVAL;
+ }
++
++ if (!sbp->sb_spino_align ||
++ sbp->sb_spino_align > sbp->sb_inoalignmt ||
++ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
++ xfs_warn(mp,
++ "Sparse inode alignment (%u) is invalid.",
++ sbp->sb_spino_align);
++ return -EINVAL;
++ }
++ } else if (sbp->sb_spino_align) {
++ xfs_warn(mp,
++ "Sparse inode alignment (%u) should be zero.",
++ sbp->sb_spino_align);
++ return -EINVAL;
+ }
+ } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
+ XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
+--
+2.39.5
+