Fixes for 6.12

author Sasha Levin <sashal@kernel.org>

Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)

committer Sasha Levin <sashal@kernel.org>

Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)
author Sasha Levin <sashal@kernel.org>
Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)
committer Sasha Levin <sashal@kernel.org>
Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)
diff --git a/queue-6.12/erofs-add-erofs_sb_free-helper.patch b/queue-6.12/erofs-add-erofs_sb_free-helper.patch

new file mode 100644 (file)

index 0000000..3f718ce
--- /dev/null
+++ b/queue-6.12/erofs-add-erofs_sb_free-helper.patch
@@ -0,0 +1,102 @@
+From e22342b717900cb58e6c9134ee441ae5b5023f0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 21:35:01 +0800
+Subject: erofs: add erofs_sb_free() helper
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit e2de3c1bf6a0c99b089bd706a62da8f988918858 ]
+
+Unify the common parts of erofs_fc_free() and erofs_kill_sb() as
+erofs_sb_free().
+
+Thus, fput() in erofs_fc_get_tree() is no longer needed, too.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212133504.2047178-1-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/super.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index 2dd7d819572f..c40821346d50 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -718,16 +718,19 @@ static int erofs_fc_get_tree(struct fs_context *fc)
+                       GET_TREE_BDEV_QUIET_LOOKUP : 0);
+ #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
+       if (ret == -ENOTBLK) {
++              struct file *file;
++
+               if (!fc->source)
+                       return invalf(fc, "No source specified");
+-              sbi->fdev = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+-              if (IS_ERR(sbi->fdev))
+-                      return PTR_ERR(sbi->fdev);
++
++              file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
++              if (IS_ERR(file))
++                      return PTR_ERR(file);
++              sbi->fdev = file;
+ 
+               if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
+                   sbi->fdev->f_mapping->a_ops->read_folio)
+                       return get_tree_nodev(fc, erofs_fc_fill_super);
+-              fput(sbi->fdev);
+       }
+ #endif
+       return ret;
+@@ -778,19 +781,24 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
+       kfree(devs);
+ }
+ 
+-static void erofs_fc_free(struct fs_context *fc)
++static void erofs_sb_free(struct erofs_sb_info *sbi)
+ {
+-      struct erofs_sb_info *sbi = fc->s_fs_info;
+-
+-      if (!sbi)
+-              return;
+-
+       erofs_free_dev_context(sbi->devs);
+       kfree(sbi->fsid);
+       kfree(sbi->domain_id);
++      if (sbi->fdev)
++              fput(sbi->fdev);
+       kfree(sbi);
+ }
+ 
++static void erofs_fc_free(struct fs_context *fc)
++{
++      struct erofs_sb_info *sbi = fc->s_fs_info;
++
++      if (sbi) /* free here if an error occurs before transferring to sb */
++              erofs_sb_free(sbi);
++}
++
+ static const struct fs_context_operations erofs_context_ops = {
+       .parse_param    = erofs_fc_parse_param,
+       .get_tree       = erofs_fc_get_tree,
+@@ -828,15 +836,9 @@ static void erofs_kill_sb(struct super_block *sb)
+               kill_anon_super(sb);
+       else
+               kill_block_super(sb);
+-
+-      erofs_free_dev_context(sbi->devs);
+       fs_put_dax(sbi->dax_dev, NULL);
+       erofs_fscache_unregister_fs(sb);
+-      kfree(sbi->fsid);
+-      kfree(sbi->domain_id);
+-      if (sbi->fdev)
+-              fput(sbi->fdev);
+-      kfree(sbi);
++      erofs_sb_free(sbi);
+       sb->s_fs_info = NULL;
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/erofs-fix-psi-memstall-accounting.patch b/queue-6.12/erofs-fix-psi-memstall-accounting.patch

new file mode 100644 (file)

index 0000000..ad8f84d
--- /dev/null
+++ b/queue-6.12/erofs-fix-psi-memstall-accounting.patch
@@ -0,0 +1,46 @@
+From b2236194259e71097c538f6e23ac5dc9e90e1cb1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Nov 2024 16:52:36 +0800
+Subject: erofs: fix PSI memstall accounting
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 1a2180f6859c73c674809f9f82e36c94084682ba ]
+
+Max Kellermann recently reported psi_group_cpu.tasks[NR_MEMSTALL] is
+incorrect in the 6.11.9 kernel.
+
+The root cause appears to be that, since the problematic commit, bio
+can be NULL, causing psi_memstall_leave() to be skipped in
+z_erofs_submit_queue().
+
+Reported-by: Max Kellermann <max.kellermann@ionos.com>
+Closes: https://lore.kernel.org/r/CAKPOu+8tvSowiJADW2RuKyofL_CSkm_SuyZA7ME5vMLWmL6pqw@mail.gmail.com
+Fixes: 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly")
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241127085236.3538334-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/zdata.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index a569ff9dfd04..1a00f061798a 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -1679,9 +1679,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
+                       erofs_fscache_submit_bio(bio);
+               else
+                       submit_bio(bio);
+-              if (memstall)
+-                      psi_memstall_leave(&pflags);
+       }
++      if (memstall)
++              psi_memstall_leave(&pflags);
+ 
+       /*
+        * although background is preferred, no one is pending for submission.
+-- 
+2.39.5
+
diff --git a/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch b/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch

new file mode 100644 (file)

index 0000000..c922618
--- /dev/null
+++ b/queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch
@@ -0,0 +1,157 @@
+From 350f3baf7618fcdaf53e7a7a4e6d9dfc7b35784d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Dec 2024 07:54:01 +0800
+Subject: erofs: reference `struct erofs_device_info` for erofs_map_dev
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit f8d920a402aec3482931cb5f1539ed438740fc49 ]
+
+Record `m_sb` and `m_dif` to replace `m_fscache`, `m_daxdev`, `m_fp`
+and `m_dax_part_off` in order to simplify the codebase.
+
+Note that `m_bdev` is still left since it can be assigned from
+`sb->s_bdev` directly.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212235401.2857246-1-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/data.c     | 26 ++++++++++----------------
+ fs/erofs/fileio.c   |  2 +-
+ fs/erofs/fscache.c  |  4 ++--
+ fs/erofs/internal.h |  6 ++----
+ 4 files changed, 15 insertions(+), 23 deletions(-)
+
+diff --git a/fs/erofs/data.c b/fs/erofs/data.c
+index 365c988262b1..722151d3fee8 100644
+--- a/fs/erofs/data.c
++++ b/fs/erofs/data.c
+@@ -186,19 +186,13 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
+ }
+ 
+ static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
+-                                  struct erofs_device_info *dif)
++              struct super_block *sb, struct erofs_device_info *dif)
+ {
++      map->m_sb = sb;
++      map->m_dif = dif;
+       map->m_bdev = NULL;
+-      map->m_fp = NULL;
+-      if (dif->file) {
+-              if (S_ISBLK(file_inode(dif->file)->i_mode))
+-                      map->m_bdev = file_bdev(dif->file);
+-              else
+-                      map->m_fp = dif->file;
+-      }
+-      map->m_daxdev = dif->dax_dev;
+-      map->m_dax_part_off = dif->dax_part_off;
+-      map->m_fscache = dif->fscache;
++      if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
++              map->m_bdev = file_bdev(dif->file);
+ }
+ 
+ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+@@ -208,7 +202,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+       erofs_off_t startoff, length;
+       int id;
+ 
+-      erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0);
++      erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
+       map->m_bdev = sb->s_bdev;       /* use s_bdev for the primary device */
+       if (map->m_deviceid) {
+               down_read(&devs->rwsem);
+@@ -222,7 +216,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+                       up_read(&devs->rwsem);
+                       return 0;
+               }
+-              erofs_fill_from_devinfo(map, dif);
++              erofs_fill_from_devinfo(map, sb, dif);
+               up_read(&devs->rwsem);
+       } else if (devs->extra_devices && !devs->flatdev) {
+               down_read(&devs->rwsem);
+@@ -235,7 +229,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+                       if (map->m_pa >= startoff &&
+                           map->m_pa < startoff + length) {
+                               map->m_pa -= startoff;
+-                              erofs_fill_from_devinfo(map, dif);
++                              erofs_fill_from_devinfo(map, sb, dif);
+                               break;
+                       }
+               }
+@@ -305,7 +299,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ 
+       iomap->offset = map.m_la;
+       if (flags & IOMAP_DAX)
+-              iomap->dax_dev = mdev.m_daxdev;
++              iomap->dax_dev = mdev.m_dif->dax_dev;
+       else
+               iomap->bdev = mdev.m_bdev;
+       iomap->length = map.m_llen;
+@@ -334,7 +328,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+               iomap->type = IOMAP_MAPPED;
+               iomap->addr = mdev.m_pa;
+               if (flags & IOMAP_DAX)
+-                      iomap->addr += mdev.m_dax_part_off;
++                      iomap->addr += mdev.m_dif->dax_part_off;
+       }
+       return 0;
+ }
+diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
+index 3af96b1e2c2a..a61b8faec651 100644
+--- a/fs/erofs/fileio.c
++++ b/fs/erofs/fileio.c
+@@ -67,7 +67,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
+                                            GFP_KERNEL | __GFP_NOFAIL);
+ 
+       bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+-      rq->iocb.ki_filp = mdev->m_fp;
++      rq->iocb.ki_filp = mdev->m_dif->file;
+       return rq;
+ }
+ 
+diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
+index ce7e38c82719..ce3d8737df85 100644
+--- a/fs/erofs/fscache.c
++++ b/fs/erofs/fscache.c
+@@ -198,7 +198,7 @@ struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
+ 
+       io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
+       bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+-      io->io.private = mdev->m_fscache->cookie;
++      io->io.private = mdev->m_dif->fscache->cookie;
+       io->io.end_io = erofs_fscache_bio_endio;
+       refcount_set(&io->io.ref, 1);
+       return &io->bio;
+@@ -316,7 +316,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
+       if (!io)
+               return -ENOMEM;
+       iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+-      ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
++      ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie,
+                       mdev.m_pa + (pos - map.m_la), io);
+       erofs_fscache_req_io_put(io);
+ 
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index d70aa2410472..3108ece1d709 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -366,11 +366,9 @@ enum {
+ };
+ 
+ struct erofs_map_dev {
+-      struct erofs_fscache *m_fscache;
++      struct super_block *m_sb;
++      struct erofs_device_info *m_dif;
+       struct block_device *m_bdev;
+-      struct dax_device *m_daxdev;
+-      struct file *m_fp;
+-      u64 m_dax_part_off;
+ 
+       erofs_off_t m_pa;
+       unsigned int m_deviceid;
+-- 
+2.39.5
+
diff --git a/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch b/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch

new file mode 100644 (file)

index 0000000..111c06a
--- /dev/null
+++ b/queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch
@@ -0,0 +1,139 @@
+From 11a8695f7aed6930bfc758b83fec24f1eb4ebcb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 21:43:36 +0800
+Subject: erofs: use buffered I/O for file-backed mounts by default
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 6422cde1b0d5a31b206b263417c1c2b3c80fe82c ]
+
+For many use cases (e.g. container images are just fetched from remote),
+performance will be impacted if underlay page cache is up-to-date but
+direct i/o flushes dirty pages first.
+
+Instead, let's use buffered I/O by default to keep in sync with loop
+devices and add a (re)mount option to explicitly give a try to use
+direct I/O if supported by the underlying files.
+
+The container startup time is improved as below:
+[workload] docker.io/library/workpress:latest
+                                     unpack        1st run  non-1st runs
+EROFS snapshotter buffered I/O file  4.586404265s  0.308s   0.198s
+EROFS snapshotter direct I/O file    4.581742849s  2.238s   0.222s
+EROFS snapshotter loop               4.596023152s  0.346s   0.201s
+Overlayfs snapshotter                5.382851037s  0.206s   0.214s
+
+Fixes: fb176750266a ("erofs: add file-backed mount support")
+Cc: Derek McGowan <derek@mcg.dev>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241212134336.2059899-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/fileio.c   |  7 +++++--
+ fs/erofs/internal.h |  1 +
+ fs/erofs/super.c    | 23 +++++++++++++++--------
+ 3 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
+index a61b8faec651..33f8539dda4a 100644
+--- a/fs/erofs/fileio.c
++++ b/fs/erofs/fileio.c
+@@ -9,6 +9,7 @@ struct erofs_fileio_rq {
+       struct bio_vec bvecs[BIO_MAX_VECS];
+       struct bio bio;
+       struct kiocb iocb;
++      struct super_block *sb;
+ };
+ 
+ struct erofs_fileio {
+@@ -52,8 +53,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
+       rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+       rq->iocb.ki_ioprio = get_current_ioprio();
+       rq->iocb.ki_complete = erofs_fileio_ki_complete;
+-      rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
+-                              IOCB_DIRECT : 0;
++      if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
++          rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT)
++              rq->iocb.ki_flags = IOCB_DIRECT;
+       iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
+                     rq->bio.bi_iter.bi_size);
+       ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
+@@ -68,6 +70,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
+ 
+       bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+       rq->iocb.ki_filp = mdev->m_dif->file;
++      rq->sb = mdev->m_sb;
+       return rq;
+ }
+ 
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index 3108ece1d709..77e785a6dfa7 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -182,6 +182,7 @@ struct erofs_sb_info {
+ #define EROFS_MOUNT_POSIX_ACL         0x00000020
+ #define EROFS_MOUNT_DAX_ALWAYS                0x00000040
+ #define EROFS_MOUNT_DAX_NEVER         0x00000080
++#define EROFS_MOUNT_DIRECT_IO         0x00000100
+ 
+ #define clear_opt(opt, option)        ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
+ #define set_opt(opt, option)  ((opt)->mount_opt |= EROFS_MOUNT_##option)
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index 60f7bd43a5a4..5b279977c9d5 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -379,14 +379,8 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
+ }
+ 
+ enum {
+-      Opt_user_xattr,
+-      Opt_acl,
+-      Opt_cache_strategy,
+-      Opt_dax,
+-      Opt_dax_enum,
+-      Opt_device,
+-      Opt_fsid,
+-      Opt_domain_id,
++      Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
++      Opt_device, Opt_fsid, Opt_domain_id, Opt_directio,
+       Opt_err
+ };
+ 
+@@ -413,6 +407,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
+       fsparam_string("device",        Opt_device),
+       fsparam_string("fsid",          Opt_fsid),
+       fsparam_string("domain_id",     Opt_domain_id),
++      fsparam_flag_no("directio",     Opt_directio),
+       {}
+ };
+ 
+@@ -526,6 +521,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
+               errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
+               break;
+ #endif
++      case Opt_directio:
++#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
++              if (result.boolean)
++                      set_opt(&sbi->opt, DIRECT_IO);
++              else
++                      clear_opt(&sbi->opt, DIRECT_IO);
++#else
++              errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
++#endif
++              break;
+       default:
+               return -ENOPARAM;
+       }
+@@ -963,6 +968,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+               seq_puts(seq, ",dax=always");
+       if (test_opt(opt, DAX_NEVER))
+               seq_puts(seq, ",dax=never");
++      if (erofs_is_fileio_mode(sbi) && test_opt(opt, DIRECT_IO))
++              seq_puts(seq, ",directio");
+ #ifdef CONFIG_EROFS_FS_ONDEMAND
+       if (sbi->fsid)
+               seq_printf(seq, ",fsid=%s", sbi->fsid);
+-- 
+2.39.5
+
diff --git a/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch b/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch

new file mode 100644 (file)

index 0000000..cc7b261
--- /dev/null
+++ b/queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch
@@ -0,0 +1,219 @@
+From a4931361873eb6e0e8ab75aa98a1d3c7d78e46de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2024 20:53:08 +0800
+Subject: erofs: use `struct erofs_device_info` for the primary device
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 7b00af2c5414dc01e0718deef7ead81102867636 ]
+
+Instead of just listing each one directly in `struct erofs_sb_info`
+except that we still use `sb->s_bdev` for the primary block device.
+
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20241216125310.930933-2-hsiangkao@linux.alibaba.com
+Stable-dep-of: 6422cde1b0d5 ("erofs: use buffered I/O for file-backed mounts by default")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/data.c     | 12 ++++--------
+ fs/erofs/fscache.c  |  6 +++---
+ fs/erofs/internal.h |  8 ++------
+ fs/erofs/super.c    | 27 +++++++++++++--------------
+ 4 files changed, 22 insertions(+), 31 deletions(-)
+
+diff --git a/fs/erofs/data.c b/fs/erofs/data.c
+index fa51437e1d99..365c988262b1 100644
+--- a/fs/erofs/data.c
++++ b/fs/erofs/data.c
+@@ -63,10 +63,10 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
+ 
+       buf->file = NULL;
+       if (erofs_is_fileio_mode(sbi)) {
+-              buf->file = sbi->fdev;          /* some fs like FUSE needs it */
++              buf->file = sbi->dif0.file;     /* some fs like FUSE needs it */
+               buf->mapping = buf->file->f_mapping;
+       } else if (erofs_is_fscache_mode(sb))
+-              buf->mapping = sbi->s_fscache->inode->i_mapping;
++              buf->mapping = sbi->dif0.fscache->inode->i_mapping;
+       else
+               buf->mapping = sb->s_bdev->bd_mapping;
+ }
+@@ -208,12 +208,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
+       erofs_off_t startoff, length;
+       int id;
+ 
+-      map->m_bdev = sb->s_bdev;
+-      map->m_daxdev = EROFS_SB(sb)->dax_dev;
+-      map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
+-      map->m_fscache = EROFS_SB(sb)->s_fscache;
+-      map->m_fp = EROFS_SB(sb)->fdev;
+-
++      erofs_fill_from_devinfo(map, &EROFS_SB(sb)->dif0);
++      map->m_bdev = sb->s_bdev;       /* use s_bdev for the primary device */
+       if (map->m_deviceid) {
+               down_read(&devs->rwsem);
+               dif = idr_find(&devs->tree, map->m_deviceid - 1);
+diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
+index fda16eedafb5..ce7e38c82719 100644
+--- a/fs/erofs/fscache.c
++++ b/fs/erofs/fscache.c
+@@ -657,7 +657,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
+       if (IS_ERR(fscache))
+               return PTR_ERR(fscache);
+ 
+-      sbi->s_fscache = fscache;
++      sbi->dif0.fscache = fscache;
+       return 0;
+ }
+ 
+@@ -665,14 +665,14 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
+ {
+       struct erofs_sb_info *sbi = EROFS_SB(sb);
+ 
+-      erofs_fscache_unregister_cookie(sbi->s_fscache);
++      erofs_fscache_unregister_cookie(sbi->dif0.fscache);
+ 
+       if (sbi->domain)
+               erofs_fscache_domain_put(sbi->domain);
+       else
+               fscache_relinquish_volume(sbi->volume, NULL, false);
+ 
+-      sbi->s_fscache = NULL;
++      sbi->dif0.fscache = NULL;
+       sbi->volume = NULL;
+       sbi->domain = NULL;
+ }
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index 9b03c8f323a7..d70aa2410472 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -113,6 +113,7 @@ struct erofs_xattr_prefix_item {
+ };
+ 
+ struct erofs_sb_info {
++      struct erofs_device_info dif0;
+       struct erofs_mount_opts opt;    /* options */
+ #ifdef CONFIG_EROFS_FS_ZIP
+       /* list for all registered superblocks, mainly for shrinker */
+@@ -130,13 +131,9 @@ struct erofs_sb_info {
+ 
+       struct erofs_sb_lz4_info lz4;
+ #endif        /* CONFIG_EROFS_FS_ZIP */
+-      struct file *fdev;
+       struct inode *packed_inode;
+       struct erofs_dev_context *devs;
+-      struct dax_device *dax_dev;
+-      u64 dax_part_off;
+       u64 total_blocks;
+-      u32 primarydevice_blocks;
+ 
+       u32 meta_blkaddr;
+ #ifdef CONFIG_EROFS_FS_XATTR
+@@ -172,7 +169,6 @@ struct erofs_sb_info {
+ 
+       /* fscache support */
+       struct fscache_volume *volume;
+-      struct erofs_fscache *s_fscache;
+       struct erofs_domain *domain;
+       char *fsid;
+       char *domain_id;
+@@ -193,7 +189,7 @@ struct erofs_sb_info {
+ 
+ static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi)
+ {
+-      return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->fdev;
++      return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file;
+ }
+ 
+ static inline bool erofs_is_fscache_mode(struct super_block *sb)
+diff --git a/fs/erofs/super.c b/fs/erofs/super.c
+index c40821346d50..60f7bd43a5a4 100644
+--- a/fs/erofs/super.c
++++ b/fs/erofs/super.c
+@@ -218,7 +218,7 @@ static int erofs_scan_devices(struct super_block *sb,
+       struct erofs_device_info *dif;
+       int id, err = 0;
+ 
+-      sbi->total_blocks = sbi->primarydevice_blocks;
++      sbi->total_blocks = sbi->dif0.blocks;
+       if (!erofs_sb_has_device_table(sbi))
+               ondisk_extradevs = 0;
+       else
+@@ -322,7 +322,7 @@ static int erofs_read_superblock(struct super_block *sb)
+                         sbi->sb_size);
+               goto out;
+       }
+-      sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
++      sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
+       sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
+ #ifdef CONFIG_EROFS_FS_XATTR
+       sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
+@@ -617,9 +617,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+                       return -EINVAL;
+               }
+ 
+-              sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
+-                                                &sbi->dax_part_off,
+-                                                NULL, NULL);
++              sbi->dif0.dax_dev = fs_dax_get_by_bdev(sb->s_bdev,
++                              &sbi->dif0.dax_part_off, NULL, NULL);
+       }
+ 
+       err = erofs_read_superblock(sb);
+@@ -642,7 +641,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
+       }
+ 
+       if (test_opt(&sbi->opt, DAX_ALWAYS)) {
+-              if (!sbi->dax_dev) {
++              if (!sbi->dif0.dax_dev) {
+                       errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+                       clear_opt(&sbi->opt, DAX_ALWAYS);
+               } else if (sbi->blkszbits != PAGE_SHIFT) {
+@@ -722,14 +721,13 @@ static int erofs_fc_get_tree(struct fs_context *fc)
+ 
+               if (!fc->source)
+                       return invalf(fc, "No source specified");
+-
+               file = filp_open(fc->source, O_RDONLY | O_LARGEFILE, 0);
+               if (IS_ERR(file))
+                       return PTR_ERR(file);
+-              sbi->fdev = file;
++              sbi->dif0.file = file;
+ 
+-              if (S_ISREG(file_inode(sbi->fdev)->i_mode) &&
+-                  sbi->fdev->f_mapping->a_ops->read_folio)
++              if (S_ISREG(file_inode(sbi->dif0.file)->i_mode) &&
++                  sbi->dif0.file->f_mapping->a_ops->read_folio)
+                       return get_tree_nodev(fc, erofs_fc_fill_super);
+       }
+ #endif
+@@ -786,8 +784,8 @@ static void erofs_sb_free(struct erofs_sb_info *sbi)
+       erofs_free_dev_context(sbi->devs);
+       kfree(sbi->fsid);
+       kfree(sbi->domain_id);
+-      if (sbi->fdev)
+-              fput(sbi->fdev);
++      if (sbi->dif0.file)
++              fput(sbi->dif0.file);
+       kfree(sbi);
+ }
+ 
+@@ -832,11 +830,12 @@ static void erofs_kill_sb(struct super_block *sb)
+ {
+       struct erofs_sb_info *sbi = EROFS_SB(sb);
+ 
+-      if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) || sbi->fdev)
++      if ((IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) ||
++          sbi->dif0.file)
+               kill_anon_super(sb);
+       else
+               kill_block_super(sb);
+-      fs_put_dax(sbi->dax_dev, NULL);
++      fs_put_dax(sbi->dif0.dax_dev, NULL);
+       erofs_fscache_unregister_fs(sb);
+       erofs_sb_free(sbi);
+       sb->s_fs_info = NULL;
+-- 
+2.39.5
+
diff --git a/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch b/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch

new file mode 100644 (file)

index 0000000..24a1432
--- /dev/null
+++ b/queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch
@@ -0,0 +1,141 @@
+From 0a8a7df983ed842d9bb150a8e59bc92a125ec624 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Dec 2024 14:31:08 +0000
+Subject: firmware: arm_ffa: Fix the race around setting ffa_dev->properties
+
+From: Levi Yun <yeoreum.yun@arm.com>
+
+[ Upstream commit 6fe437cfe2cdc797b03f63b338a13fac96ed6a08 ]
+
+Currently, ffa_dev->properties is set after the ffa_device_register()
+call return in ffa_setup_partitions(). This could potentially result in
+a race where the partition's properties is accessed while probing
+struct ffa_device before it is set.
+
+Update the ffa_device_register() to receive ffa_partition_info so all
+the data from the partition information received from the firmware can
+be updated into the struct ffa_device before the calling device_register()
+in ffa_device_register().
+
+Fixes: e781858488b9 ("firmware: arm_ffa: Add initial FFA bus support for device enumeration")
+Signed-off-by: Levi Yun <yeoreum.yun@arm.com>
+Message-Id: <20241203143109.1030514-2-yeoreum.yun@arm.com>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_ffa/bus.c    | 15 +++++++++++----
+ drivers/firmware/arm_ffa/driver.c |  7 +------
+ include/linux/arm_ffa.h           | 13 ++++++++-----
+ 3 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
+index eb17d03b66fe..dfda5ffc14db 100644
+--- a/drivers/firmware/arm_ffa/bus.c
++++ b/drivers/firmware/arm_ffa/bus.c
+@@ -187,13 +187,18 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
+       return valid;
+ }
+ 
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+-                                     const struct ffa_ops *ops)
++struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++                  const struct ffa_ops *ops)
+ {
+       int id, ret;
++      uuid_t uuid;
+       struct device *dev;
+       struct ffa_device *ffa_dev;
+ 
++      if (!part_info)
++              return NULL;
++
+       id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
+       if (id < 0)
+               return NULL;
+@@ -210,9 +215,11 @@ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+       dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
+ 
+       ffa_dev->id = id;
+-      ffa_dev->vm_id = vm_id;
++      ffa_dev->vm_id = part_info->id;
++      ffa_dev->properties = part_info->properties;
+       ffa_dev->ops = ops;
+-      uuid_copy(&ffa_dev->uuid, uuid);
++      import_uuid(&uuid, (u8 *)part_info->uuid);
++      uuid_copy(&ffa_dev->uuid, &uuid);
+ 
+       ret = device_register(&ffa_dev->dev);
+       if (ret) {
+diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
+index b14cbdae94e8..2c2ec3c35f15 100644
+--- a/drivers/firmware/arm_ffa/driver.c
++++ b/drivers/firmware/arm_ffa/driver.c
+@@ -1387,7 +1387,6 @@ static struct notifier_block ffa_bus_nb = {
+ static int ffa_setup_partitions(void)
+ {
+       int count, idx, ret;
+-      uuid_t uuid;
+       struct ffa_device *ffa_dev;
+       struct ffa_dev_part_info *info;
+       struct ffa_partition_info *pbuf, *tpbuf;
+@@ -1406,23 +1405,19 @@ static int ffa_setup_partitions(void)
+ 
+       xa_init(&drv_info->partition_info);
+       for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) {
+-              import_uuid(&uuid, (u8 *)tpbuf->uuid);
+-
+               /* Note that if the UUID will be uuid_null, that will require
+                * ffa_bus_notifier() to find the UUID of this partition id
+                * with help of ffa_device_match_uuid(). FF-A v1.1 and above
+                * provides UUID here for each partition as part of the
+                * discovery API and the same is passed.
+                */
+-              ffa_dev = ffa_device_register(&uuid, tpbuf->id, &ffa_drv_ops);
++              ffa_dev = ffa_device_register(tpbuf, &ffa_drv_ops);
+               if (!ffa_dev) {
+                       pr_err("%s: failed to register partition ID 0x%x\n",
+                              __func__, tpbuf->id);
+                       continue;
+               }
+ 
+-              ffa_dev->properties = tpbuf->properties;
+-
+               if (drv_info->version > FFA_VERSION_1_0 &&
+                   !(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC))
+                       ffa_mode_32bit_set(ffa_dev);
+diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
+index a28e2a6a13d0..74169dd0f659 100644
+--- a/include/linux/arm_ffa.h
++++ b/include/linux/arm_ffa.h
+@@ -166,9 +166,12 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev)
+       return dev_get_drvdata(&fdev->dev);
+ }
+ 
++struct ffa_partition_info;
++
+ #if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT)
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+-                                     const struct ffa_ops *ops);
++struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++                  const struct ffa_ops *ops);
+ void ffa_device_unregister(struct ffa_device *ffa_dev);
+ int ffa_driver_register(struct ffa_driver *driver, struct module *owner,
+                       const char *mod_name);
+@@ -176,9 +179,9 @@ void ffa_driver_unregister(struct ffa_driver *driver);
+ bool ffa_device_is_valid(struct ffa_device *ffa_dev);
+ 
+ #else
+-static inline
+-struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
+-                                     const struct ffa_ops *ops)
++static inline struct ffa_device *
++ffa_device_register(const struct ffa_partition_info *part_info,
++                  const struct ffa_ops *ops)
+ {
+       return NULL;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch b/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch

new file mode 100644 (file)

index 0000000..bd6f8ce
--- /dev/null
+++ b/queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch
@@ -0,0 +1,82 @@
+From 6430d3a2c3b25314895a943f3de02cb7ebe747b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Nov 2024 00:05:18 +0100
+Subject: firmware: arm_scmi: Fix i.MX build dependency
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 514b2262ade48a0503ac6aa03c3bfb8c5be69b21 ]
+
+The newly added SCMI vendor driver references functions in the
+protocol driver but needs a Kconfig dependency to ensure it can link,
+essentially the Kconfig dependency needs to be reversed to match the
+link time dependency:
+
+  |  arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_write':
+  |    fsl_mqs.c:(.text+0x1aa): undefined reference to `scmi_imx_misc_ctrl_set'
+  |  arm-linux-gnueabi-ld: sound/soc/fsl/fsl_mqs.o: in function `fsl_mqs_sm_read':
+  |    fsl_mqs.c:(.text+0x1ee): undefined reference to `scmi_imx_misc_ctrl_get'
+
+This however only works after changing the dependency in the SND_SOC_FSL_MQS
+driver as well, which uses 'select IMX_SCMI_MISC_DRV' to turn on a
+driver it depends on. This is generally a bad idea, so the best solution
+is to change that into a dependency.
+
+To allow the ASoC driver to keep building with the SCMI support, this
+needs to be an optional dependency that enforces the link-time
+dependency if IMX_SCMI_MISC_DRV is a loadable module but not
+depend on it if that is disabled.
+
+Fixes: 61c9f03e22fc ("firmware: arm_scmi: Add initial support for i.MX MISC protocol")
+Fixes: 101c9023594a ("ASoC: fsl_mqs: Support accessing registers by scmi interface")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Mark Brown <broonie@kernel.org>
+Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>
+Message-Id: <20241115230555.2435004-1-arnd@kernel.org>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/vendors/imx/Kconfig | 1 +
+ drivers/firmware/imx/Kconfig                  | 1 -
+ sound/soc/fsl/Kconfig                         | 1 +
+ 3 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_scmi/vendors/imx/Kconfig b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
+index 2883ed24a84d..a01bf5e47301 100644
+--- a/drivers/firmware/arm_scmi/vendors/imx/Kconfig
++++ b/drivers/firmware/arm_scmi/vendors/imx/Kconfig
+@@ -15,6 +15,7 @@ config IMX_SCMI_BBM_EXT
+ config IMX_SCMI_MISC_EXT
+       tristate "i.MX SCMI MISC EXTENSION"
+       depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
++      depends on IMX_SCMI_MISC_DRV
+       default y if ARCH_MXC
+       help
+         This enables i.MX System MISC control logic such as gpio expander
+diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
+index 477d3f32d99a..907cd149c40a 100644
+--- a/drivers/firmware/imx/Kconfig
++++ b/drivers/firmware/imx/Kconfig
+@@ -25,7 +25,6 @@ config IMX_SCU
+ 
+ config IMX_SCMI_MISC_DRV
+       tristate "IMX SCMI MISC Protocol driver"
+-      depends on IMX_SCMI_MISC_EXT || COMPILE_TEST
+       default y if ARCH_MXC
+       help
+         The System Controller Management Interface firmware (SCMI FW) is
+diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
+index e283751abfef..678540b78280 100644
+--- a/sound/soc/fsl/Kconfig
++++ b/sound/soc/fsl/Kconfig
+@@ -29,6 +29,7 @@ config SND_SOC_FSL_SAI
+ config SND_SOC_FSL_MQS
+       tristate "Medium Quality Sound (MQS) module support"
+       depends on SND_SOC_FSL_SAI
++      depends on IMX_SCMI_MISC_DRV || !IMX_SCMI_MISC_DRV
+       select REGMAP_MMIO
+       help
+         Say Y if you want to add Medium Quality Sound (MQS)
+-- 
+2.39.5
+
diff --git a/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch b/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch

new file mode 100644 (file)

index 0000000..a4d6292
--- /dev/null
+++ b/queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch
@@ -0,0 +1,48 @@
+From 0fdafff1cad66f14ff6ce0e4434ab26ae0d1e4e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Dec 2024 00:19:34 +0100
+Subject: i2c: pnx: Fix timeout in wait functions
+
+From: Vladimir Riabchun <ferr.lambarginio@gmail.com>
+
+[ Upstream commit 7363f2d4c18557c99c536b70489187bb4e05c412 ]
+
+Since commit f63b94be6942 ("i2c: pnx: Fix potential deadlock warning
+from del_timer_sync() call in isr") jiffies are stored in
+i2c_pnx_algo_data.timeout, but wait_timeout and wait_reset are still
+using it as milliseconds. Convert jiffies back to milliseconds to wait
+for the expected amount of time.
+
+Fixes: f63b94be6942 ("i2c: pnx: Fix potential deadlock warning from del_timer_sync() call in isr")
+Signed-off-by: Vladimir Riabchun <ferr.lambarginio@gmail.com>
+Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-pnx.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
+index 1dafadda73af..135300f3b534 100644
+--- a/drivers/i2c/busses/i2c-pnx.c
++++ b/drivers/i2c/busses/i2c-pnx.c
+@@ -95,7 +95,7 @@ enum {
+ 
+ static inline int wait_timeout(struct i2c_pnx_algo_data *data)
+ {
+-      long timeout = data->timeout;
++      long timeout = jiffies_to_msecs(data->timeout);
+       while (timeout > 0 &&
+                       (ioread32(I2C_REG_STS(data)) & mstatus_active)) {
+               mdelay(1);
+@@ -106,7 +106,7 @@ static inline int wait_timeout(struct i2c_pnx_algo_data *data)
+ 
+ static inline int wait_reset(struct i2c_pnx_algo_data *data)
+ {
+-      long timeout = data->timeout;
++      long timeout = jiffies_to_msecs(data->timeout);
+       while (timeout > 0 &&
+                       (ioread32(I2C_REG_CTL(data)) & mcntrl_reset)) {
+               mdelay(1);
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch b/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch

new file mode 100644 (file)

index 0000000..4b8cac0
--- /dev/null
+++ b/queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch
@@ -0,0 +1,83 @@
+From 7db091e222bec2ba7fbad34cbcf8a5195bfdf7b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2024 12:40:11 +0000
+Subject: net: stmmac: fix TSO DMA API usage causing oops
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit 4c49f38e20a57f8abaebdf95b369295b153d1f8e ]
+
+Commit 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap
+for non-paged SKB data") moved the assignment of tx_skbuff_dma[]'s
+members to be later in stmmac_tso_xmit().
+
+The buf (dma cookie) and len stored in this structure are passed to
+dma_unmap_single() by stmmac_tx_clean(). The DMA API requires that
+the dma cookie passed to dma_unmap_single() is the same as the value
+returned from dma_map_single(). However, by moving the assignment
+later, this is not the case when priv->dma_cap.addr64 > 32 as "des"
+is offset by proto_hdr_len.
+
+This causes problems such as:
+
+  dwc-eth-dwmac 2490000.ethernet eth0: Tx DMA map failed
+
+and with DMA_API_DEBUG enabled:
+
+  DMA-API: dwc-eth-dwmac 2490000.ethernet: device driver tries to +free DMA memory it has not allocated [device address=0x000000ffffcf65c0] [size=66 bytes]
+
+Fix this by maintaining "des" as the original DMA cookie, and use
+tso_des to pass the offset DMA cookie to stmmac_tso_allocator().
+
+Full details of the crashes can be found at:
+https://lore.kernel.org/all/d8112193-0386-4e14-b516-37c2d838171a@nvidia.com/
+https://lore.kernel.org/all/klkzp5yn5kq5efgtrow6wbvnc46bcqfxs65nz3qy77ujr5turc@bwwhelz2l4dw/
+
+Reported-by: Jon Hunter <jonathanh@nvidia.com>
+Reported-by: Thierry Reding <thierry.reding@gmail.com>
+Fixes: 66600fac7a98 ("net: stmmac: TSO: Fix unbalanced DMA map/unmap for non-paged SKB data")
+Tested-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Reviewed-by: Furong Xu <0x1207@gmail.com>
+Link: https://patch.msgid.link/E1tJXcx-006N4Z-PC@rmk-PC.armlinux.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 766213ee82c1..cf7b59b8cc64 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -4220,8 +4220,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
+       struct stmmac_txq_stats *txq_stats;
+       struct stmmac_tx_queue *tx_q;
+       u32 pay_len, mss, queue;
++      dma_addr_t tso_des, des;
+       u8 proto_hdr_len, hdr;
+-      dma_addr_t des;
+       bool set_ic;
+       int i;
+ 
+@@ -4317,14 +4317,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
+ 
+               /* If needed take extra descriptors to fill the remaining payload */
+               tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
++              tso_des = des;
+       } else {
+               stmmac_set_desc_addr(priv, first, des);
+               tmp_pay_len = pay_len;
+-              des += proto_hdr_len;
++              tso_des = des + proto_hdr_len;
+               pay_len = 0;
+       }
+ 
+-      stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
++      stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue);
+ 
+       /* In case two or more DMA transmit descriptors are allocated for this
+        * non-paged SKB data, the DMA buffer address should be saved to
+-- 
+2.39.5
+
diff --git a/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch b/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch

new file mode 100644 (file)

index 0000000..d11e947
--- /dev/null
+++ b/queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch
@@ -0,0 +1,136 @@
+From 95a23eb5715a3a7d024f9e4171b17925524c53ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:36 +0900
+Subject: p2sb: Do not scan and remove the P2SB device when it is unhidden
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 360c400d0f568636c1b98d1d5f9f49aa3d420c70 ]
+
+When drivers access P2SB device resources, it calls p2sb_bar(). Before
+the commit 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls
+during PCI device probe"), p2sb_bar() obtained the resources and then
+called pci_stop_and_remove_bus_device() for clean up. Then the P2SB
+device disappeared. The commit 5913320eb0b3 introduced the P2SB device
+resource cache feature in the boot process. During the resource cache,
+pci_stop_and_remove_bus_device() is called for the P2SB device, then the
+P2SB device disappears regardless of whether p2sb_bar() is called or
+not. Such P2SB device disappearance caused a confusion [1]. To avoid the
+confusion, avoid the pci_stop_and_remove_bus_device() call when the BIOS
+does not hide the P2SB device.
+
+For that purpose, cache the P2SB device resources only if the BIOS hides
+the P2SB device. Call p2sb_scan_and_cache() only if p2sb_hidden_by_bios
+is true. This allows removing two branches from p2sb_scan_and_cache().
+When p2sb_bar() is called, get the resources from the cache if the P2SB
+device is hidden. Otherwise, read the resources from the unhidden P2SB
+device.
+
+Reported-by: Daniel Walker (danielwa) <danielwa@cisco.com>
+Closes: https://lore.kernel.org/lkml/ZzTI+biIUTvFT6NC@goliath/ [1]
+Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-5-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 42 +++++++++++++++++++++++++++++--------
+ 1 file changed, 33 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 0bc6b21c4c20..c56650b9ff96 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -100,10 +100,8 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+       /*
+        * The BIOS prevents the P2SB device from being enumerated by the PCI
+        * subsystem, so we need to unhide and hide it back to lookup the BAR.
+-       * Unhide the P2SB device here, if needed.
+        */
+-      if (p2sb_hidden_by_bios)
+-              pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
++      pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
+ 
+       /* Scan the P2SB device and cache its BAR0 */
+       p2sb_scan_and_cache_devfn(bus, devfn);
+@@ -112,9 +110,7 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+       if (devfn == P2SB_DEVFN_GOLDMONT)
+               p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+ 
+-      /* Hide the P2SB device, if it was hidden */
+-      if (p2sb_hidden_by_bios)
+-              pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
++      pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
+ 
+       if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
+               return -ENOENT;
+@@ -141,7 +137,7 @@ static int p2sb_cache_resources(void)
+       u32 value = P2SBC_HIDE;
+       struct pci_bus *bus;
+       u16 class;
+-      int ret;
++      int ret = 0;
+ 
+       /* Get devfn for P2SB device itself */
+       p2sb_get_devfn(&devfn_p2sb);
+@@ -167,7 +163,12 @@ static int p2sb_cache_resources(void)
+       pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+       p2sb_hidden_by_bios = value & P2SBC_HIDE;
+ 
+-      ret = p2sb_scan_and_cache(bus, devfn_p2sb);
++      /*
++       * If the BIOS does not hide the P2SB device then its resources
++       * are accesilble. Cache them only if the P2SB device is hidden.
++       */
++      if (p2sb_hidden_by_bios)
++              ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+ 
+       pci_unlock_rescan_remove();
+ 
+@@ -190,6 +191,26 @@ static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn,
+       return 0;
+ }
+ 
++static int p2sb_read_from_dev(struct pci_bus *bus, unsigned int devfn,
++                            struct resource *mem)
++{
++      struct pci_dev *pdev;
++      int ret = 0;
++
++      pdev = pci_get_slot(bus, devfn);
++      if (!pdev)
++              return -ENODEV;
++
++      if (p2sb_valid_resource(pci_resource_n(pdev, 0)))
++              p2sb_read_bar0(pdev, mem);
++      else
++              ret = -ENOENT;
++
++      pci_dev_put(pdev);
++
++      return ret;
++}
++
+ /**
+  * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+  * @bus: PCI bus to communicate with
+@@ -213,7 +234,10 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+       if (!devfn)
+               p2sb_get_devfn(&devfn);
+ 
+-      return p2sb_read_from_cache(bus, devfn, mem);
++      if (p2sb_hidden_by_bios)
++              return p2sb_read_from_cache(bus, devfn, mem);
++
++      return p2sb_read_from_dev(bus, devfn, mem);
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch b/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch

new file mode 100644 (file)

index 0000000..f41dff3
--- /dev/null
+++ b/queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch
@@ -0,0 +1,82 @@
+From 27398c9595ae269e34ca424295794a655b0c9519 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:33 +0900
+Subject: p2sb: Factor out p2sb_read_from_cache()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 9244524d60ddea55f4df54c51200e8fef2032447 ]
+
+To prepare for the following fix, factor out the code to read the P2SB
+resource from the cache to the new function p2sb_read_from_cache().
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-2-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 31f38309b389..aa34b8a69bc1 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -171,6 +171,22 @@ static int p2sb_cache_resources(void)
+       return ret;
+ }
+ 
++static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn,
++                              struct resource *mem)
++{
++      struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)];
++
++      if (cache->bus_dev_id != bus->dev.id)
++              return -ENODEV;
++
++      if (!p2sb_valid_resource(&cache->res))
++              return -ENOENT;
++
++      memcpy(mem, &cache->res, sizeof(*mem));
++
++      return 0;
++}
++
+ /**
+  * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR
+  * @bus: PCI bus to communicate with
+@@ -187,8 +203,6 @@ static int p2sb_cache_resources(void)
+  */
+ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+ {
+-      struct p2sb_res_cache *cache;
+-
+       bus = p2sb_get_bus(bus);
+       if (!bus)
+               return -ENODEV;
+@@ -196,15 +210,7 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
+       if (!devfn)
+               p2sb_get_devfn(&devfn);
+ 
+-      cache = &p2sb_resources[PCI_FUNC(devfn)];
+-      if (cache->bus_dev_id != bus->dev.id)
+-              return -ENODEV;
+-
+-      if (!p2sb_valid_resource(&cache->res))
+-              return -ENOENT;
+-
+-      memcpy(mem, &cache->res, sizeof(*mem));
+-      return 0;
++      return p2sb_read_from_cache(bus, devfn, mem);
+ }
+ EXPORT_SYMBOL_GPL(p2sb_bar);
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch b/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch

new file mode 100644 (file)

index 0000000..40b0a5b
--- /dev/null
+++ b/queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch
@@ -0,0 +1,60 @@
+From 1da75876350e6c54be662f7dbf00e4b2261190e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:34 +0900
+Subject: p2sb: Introduce the global flag p2sb_hidden_by_bios
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit ae3e6ebc5ab046d434c05c58a3e3f7e94441fec2 ]
+
+To prepare for the following fix, introduce the global flag
+p2sb_hidden_by_bios. Check if the BIOS hides the P2SB device and store
+the result in the flag. This allows to refer to the check result across
+functions.
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-3-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index aa34b8a69bc1..273ac90c8fbd 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -42,6 +42,7 @@ struct p2sb_res_cache {
+ };
+ 
+ static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE];
++static bool p2sb_hidden_by_bios;
+ 
+ static void p2sb_get_devfn(unsigned int *devfn)
+ {
+@@ -157,13 +158,14 @@ static int p2sb_cache_resources(void)
+        * Unhide the P2SB device here, if needed.
+        */
+       pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+-      if (value & P2SBC_HIDE)
++      p2sb_hidden_by_bios = value & P2SBC_HIDE;
++      if (p2sb_hidden_by_bios)
+               pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+ 
+       ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+ 
+       /* Hide the P2SB device, if it was hidden */
+-      if (value & P2SBC_HIDE)
++      if (p2sb_hidden_by_bios)
+               pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
+ 
+       pci_unlock_rescan_remove();
+-- 
+2.39.5
+
diff --git a/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch b/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch

new file mode 100644 (file)

index 0000000..30a5f89
--- /dev/null
+++ b/queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch
@@ -0,0 +1,82 @@
+From 2db2be13b4ed140c36ac1a0b515d000ee3274e6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 09:28:35 +0900
+Subject: p2sb: Move P2SB hide and unhide code to p2sb_scan_and_cache()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+[ Upstream commit 0286070c74ee48391fc07f7f617460479472d221 ]
+
+To prepare for the following fix, move the code to hide and unhide the
+P2SB device from p2sb_cache_resources() to p2sb_scan_and_cache().
+
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20241128002836.373745-4-shinichiro.kawasaki@wdc.com
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Stable-dep-of: 360c400d0f56 ("p2sb: Do not scan and remove the P2SB device when it is unhidden")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/p2sb.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
+index 273ac90c8fbd..0bc6b21c4c20 100644
+--- a/drivers/platform/x86/p2sb.c
++++ b/drivers/platform/x86/p2sb.c
+@@ -97,6 +97,14 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn)
+ 
+ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+ {
++      /*
++       * The BIOS prevents the P2SB device from being enumerated by the PCI
++       * subsystem, so we need to unhide and hide it back to lookup the BAR.
++       * Unhide the P2SB device here, if needed.
++       */
++      if (p2sb_hidden_by_bios)
++              pci_bus_write_config_dword(bus, devfn, P2SBC, 0);
++
+       /* Scan the P2SB device and cache its BAR0 */
+       p2sb_scan_and_cache_devfn(bus, devfn);
+ 
+@@ -104,6 +112,10 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn)
+       if (devfn == P2SB_DEVFN_GOLDMONT)
+               p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT);
+ 
++      /* Hide the P2SB device, if it was hidden */
++      if (p2sb_hidden_by_bios)
++              pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE);
++
+       if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res))
+               return -ENOENT;
+ 
+@@ -152,22 +164,11 @@ static int p2sb_cache_resources(void)
+        */
+       pci_lock_rescan_remove();
+ 
+-      /*
+-       * The BIOS prevents the P2SB device from being enumerated by the PCI
+-       * subsystem, so we need to unhide and hide it back to lookup the BAR.
+-       * Unhide the P2SB device here, if needed.
+-       */
+       pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
+       p2sb_hidden_by_bios = value & P2SBC_HIDE;
+-      if (p2sb_hidden_by_bios)
+-              pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
+ 
+       ret = p2sb_scan_and_cache(bus, devfn_p2sb);
+ 
+-      /* Hide the P2SB device, if it was hidden */
+-      if (p2sb_hidden_by_bios)
+-              pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
+-
+       pci_unlock_rescan_remove();
+ 
+       return ret;
+-- 
+2.39.5
+
diff --git a/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch b/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch

new file mode 100644 (file)

index 0000000..67c91be
--- /dev/null
+++ b/queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch
@@ -0,0 +1,39 @@
+From e357b868a26c8404b85881e96039e0d7dbf67171 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Nov 2024 04:18:40 +0000
+Subject: RISC-V: KVM: Fix csr_write -> csr_set for HVIEN PMU overflow bit
+
+From: Michael Neuling <michaelneuling@tenstorrent.com>
+
+[ Upstream commit ea6398a5af81e3e7fb3da5d261694d479a321fd9 ]
+
+This doesn't cause a problem currently as HVIEN isn't used elsewhere
+yet. Found by inspection.
+
+Signed-off-by: Michael Neuling <michaelneuling@tenstorrent.com>
+Fixes: 16b0bde9a37c ("RISC-V: KVM: Add perf sampling support for guests")
+Reviewed-by: Atish Patra <atishp@rivosinc.com>
+Reviewed-by: Anup Patel <anup@brainfault.org>
+Link: https://lore.kernel.org/r/20241127041840.419940-1-michaelneuling@tenstorrent.com
+Signed-off-by: Anup Patel <anup@brainfault.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kvm/aia.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
+index 2967d305c442..9f3b527596de 100644
+--- a/arch/riscv/kvm/aia.c
++++ b/arch/riscv/kvm/aia.c
+@@ -552,7 +552,7 @@ void kvm_riscv_aia_enable(void)
+       csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+       /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+       if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+-              csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
++              csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF));
+ }
+ 
+ void kvm_riscv_aia_disable(void)
+-- 
+2.39.5
+
diff --git a/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch b/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch

new file mode 100644 (file)

index 0000000..2835ebb
--- /dev/null
+++ b/queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch
@@ -0,0 +1,38 @@
+From 0d7624d8f519497f54bf832e96a0d8646cc8fe01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 17:43:48 +0100
+Subject: s390/ipl: Fix never less than zero warning
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+[ Upstream commit 5fa49dd8e521a42379e5e41fcf2c92edaaec0a8b ]
+
+DEFINE_IPL_ATTR_STR_RW() macro produces "unsigned 'len' is never less
+than zero." warning when sys_vmcmd_on_*_store() callbacks are defined.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202412081614.5uel8F6W-lkp@intel.com/
+Fixes: 247576bf624a ("s390/ipl: Do not accept z/VM CP diag X'008' cmds longer than max length")
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/ipl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
+index f17bb7bf9392..5fa203f4bc6b 100644
+--- a/arch/s390/kernel/ipl.c
++++ b/arch/s390/kernel/ipl.c
+@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,       \
+       if (len >= sizeof(_value))                                      \
+               return -E2BIG;                                          \
+       len = strscpy(_value, buf, sizeof(_value));                     \
+-      if (len < 0)                                                    \
++      if ((ssize_t)len < 0)                                           \
+               return len;                                             \
+       strim(_value);                                                  \
+       return len;                                                     \
+-- 
+2.39.5
+
diff --git a/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch b/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch

new file mode 100644 (file)

index 0000000..361a052
--- /dev/null
+++ b/queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch
@@ -0,0 +1,41 @@
+From 025f5a1045222edc3b8a26a391636ca98d5dbcdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2024 12:35:34 +0100
+Subject: s390/mm: Consider KMSAN modules metadata for paging levels
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+[ Upstream commit 282da38b465395c930687974627c24f47ddce5ff ]
+
+The calculation determining whether to use three- or four-level paging
+didn't account for KMSAN modules metadata. Include this metadata in the
+virtual memory size calculation to ensure correct paging mode selection
+and avoiding potentially unnecessary physical memory size limitations.
+
+Fixes: 65ca73f9fb36 ("s390/mm: define KMSAN metadata for vmalloc and modules")
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/boot/startup.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
+index c8f149ad77e5..c2ee0745f59e 100644
+--- a/arch/s390/boot/startup.c
++++ b/arch/s390/boot/startup.c
+@@ -231,6 +231,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
+       vsize = round_up(SZ_2G + max_mappable, rte_size) +
+               round_up(vmemmap_size, rte_size) +
+               FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
++      if (IS_ENABLED(CONFIG_KMSAN))
++              vsize += MODULES_LEN * 2;
+       return size_add(vsize, vmalloc_size);
+ }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch b/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch

new file mode 100644 (file)

index 0000000..c90f634
--- /dev/null
+++ b/queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch
@@ -0,0 +1,164 @@
+From 44c8bb66dcae7fbaaa7077cd0e045d83aac1b658 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 22:22:36 -0500
+Subject: sched/dlserver: Fix dlserver double enqueue
+
+From: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
+
+[ Upstream commit b53127db1dbf7f1047cf35c10922d801dcd40324 ]
+
+dlserver can get dequeued during a dlserver pick_task due to the delayed
+deueue feature and this can lead to issues with dlserver logic as it
+still thinks that dlserver is on the runqueue. The dlserver throttling
+and replenish logic gets confused and can lead to double enqueue of
+dlserver.
+
+Double enqueue of dlserver could happend due to couple of reasons:
+
+Case 1
+------
+
+Delayed dequeue feature[1] can cause dlserver being stopped during a
+pick initiated by dlserver:
+  __pick_next_task
+   pick_task_dl -> server_pick_task
+    pick_task_fair
+     pick_next_entity (if (sched_delayed))
+      dequeue_entities
+       dl_server_stop
+
+server_pick_task goes ahead with update_curr_dl_se without knowing that
+dlserver is dequeued and this confuses the logic and may lead to
+unintended enqueue while the server is stopped.
+
+Case 2
+------
+A race condition between a task dequeue on one cpu and same task's enqueue
+on this cpu by a remote cpu while the lock is released causing dlserver
+double enqueue.
+
+One cpu would be in the schedule() and releasing RQ-lock:
+
+current->state = TASK_INTERRUPTIBLE();
+        schedule();
+          deactivate_task()
+            dl_stop_server();
+          pick_next_task()
+            pick_next_task_fair()
+              sched_balance_newidle()
+                rq_unlock(this_rq)
+
+at which point another CPU can take our RQ-lock and do:
+
+        try_to_wake_up()
+          ttwu_queue()
+            rq_lock()
+            ...
+            activate_task()
+              dl_server_start() --> first enqueue
+            wakeup_preempt() := check_preempt_wakeup_fair()
+              update_curr()
+                update_curr_task()
+                  if (current->dl_server)
+                    dl_server_update()
+                      enqueue_dl_entity() --> second enqueue
+
+This bug was not apparent as the enqueue in dl_server_start doesn't
+usually happen because of the defer logic. But as a side effect of the
+first case(dequeue during dlserver pick), dl_throttled and dl_yield will
+be set and this causes the time accounting of dlserver to messup and
+then leading to a enqueue in dl_server_start.
+
+Have an explicit flag representing the status of dlserver to avoid the
+confusion. This is set in dl_server_start and reset in dlserver_stop.
+
+Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Marcel Ziswiler <marcel.ziswiler@codethink.co.uk> # ROCK 5B
+Link: https://lkml.kernel.org/r/20241213032244.877029-1-vineeth@bitbyteword.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h   | 7 +++++++
+ kernel/sched/deadline.c | 8 ++++++--
+ kernel/sched/sched.h    | 5 +++++
+ 3 files changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index bb343136ddd0..c14446c6164d 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -656,6 +656,12 @@ struct sched_dl_entity {
+        * @dl_defer_armed tells if the deferrable server is waiting
+        * for the replenishment timer to activate it.
+        *
++       * @dl_server_active tells if the dlserver is active(started).
++       * dlserver is started on first cfs enqueue on an idle runqueue
++       * and is stopped when a dequeue results in 0 cfs tasks on the
++       * runqueue. In other words, dlserver is active only when cpu's
++       * runqueue has atleast one cfs task.
++       *
+        * @dl_defer_running tells if the deferrable server is actually
+        * running, skipping the defer phase.
+        */
+@@ -664,6 +670,7 @@ struct sched_dl_entity {
+       unsigned int                    dl_non_contending : 1;
+       unsigned int                    dl_overrun        : 1;
+       unsigned int                    dl_server         : 1;
++      unsigned int                    dl_server_active  : 1;
+       unsigned int                    dl_defer          : 1;
+       unsigned int                    dl_defer_armed    : 1;
+       unsigned int                    dl_defer_running  : 1;
+diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
+index fc6f41ac33eb..a17c23b53049 100644
+--- a/kernel/sched/deadline.c
++++ b/kernel/sched/deadline.c
+@@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
+       if (!dl_se->dl_runtime)
+               return;
+ 
++      dl_se->dl_server_active = 1;
+       enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+       if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+               resched_curr(dl_se->rq);
+@@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
+       hrtimer_try_to_cancel(&dl_se->dl_timer);
+       dl_se->dl_defer_armed = 0;
+       dl_se->dl_throttled = 0;
++      dl_se->dl_server_active = 0;
+ }
+ 
+ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
+@@ -2420,8 +2422,10 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
+       if (dl_server(dl_se)) {
+               p = dl_se->server_pick_task(dl_se);
+               if (!p) {
+-                      dl_se->dl_yielded = 1;
+-                      update_curr_dl_se(rq, dl_se, 0);
++                      if (dl_server_active(dl_se)) {
++                              dl_se->dl_yielded = 1;
++                              update_curr_dl_se(rq, dl_se, 0);
++                      }
+                       goto again;
+               }
+               rq->dl_server = dl_se;
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c53696275ca1..f2ef520513c4 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -398,6 +398,11 @@ extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq
+ extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
+                   u64 runtime, u64 period, bool init);
+ 
++static inline bool dl_server_active(struct sched_dl_entity *dl_se)
++{
++      return dl_se->dl_server_active;
++}
++
+ #ifdef CONFIG_CGROUP_SCHED
+ 
+ extern struct list_head task_groups;
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch b/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch

new file mode 100644 (file)

index 0000000..37db719
--- /dev/null
+++ b/queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch
@@ -0,0 +1,70 @@
+From c54ff10a7b8301b633e1aa2769b493f1ab170b56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 22:22:37 -0500
+Subject: sched/dlserver: Fix dlserver time accounting
+
+From: Vineeth Pillai (Google) <vineeth@bitbyteword.org>
+
+[ Upstream commit c7f7e9c73178e0e342486fd31e7f363ef60e3f83 ]
+
+dlserver time is accounted when:
+ - dlserver is active and the dlserver proxies the cfs task.
+ - dlserver is active but deferred and cfs task runs after being picked
+   through the normal fair class pick.
+
+dl_server_update is called in two places to make sure that both the
+above times are accounted for. But it doesn't check if dlserver is
+active or not. Now that we have this dl_server_active flag, we can
+consolidate dl_server_update into one place and all we need to check is
+whether dlserver is active or not. When dlserver is active there is only
+two possible conditions:
+ - dlserver is deferred.
+ - cfs task is running on behalf of dlserver.
+
+Fixes: a110a81c52a9 ("sched/deadline: Deferrable dl server")
+Signed-off-by: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Marcel Ziswiler <marcel.ziswiler@codethink.co.uk> # ROCK 5B
+Link: https://lore.kernel.org/r/20241213032244.877029-2-vineeth@bitbyteword.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 93142f9077c7..1ca96c99872f 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1159,8 +1159,6 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
+       trace_sched_stat_runtime(p, delta_exec);
+       account_group_exec_runtime(p, delta_exec);
+       cgroup_account_cputime(p, delta_exec);
+-      if (p->dl_server)
+-              dl_server_update(p->dl_server, delta_exec);
+ }
+ 
+ static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -1237,11 +1235,16 @@ static void update_curr(struct cfs_rq *cfs_rq)
+               update_curr_task(p, delta_exec);
+ 
+               /*
+-               * Any fair task that runs outside of fair_server should
+-               * account against fair_server such that it can account for
+-               * this time and possibly avoid running this period.
++               * If the fair_server is active, we need to account for the
++               * fair_server time whether or not the task is running on
++               * behalf of fair_server or not:
++               *  - If the task is running on behalf of fair_server, we need
++               *    to limit its time based on the assigned runtime.
++               *  - Fair task that runs outside of fair_server should account
++               *    against fair_server such that it can account for this time
++               *    and possibly avoid running this period.
+                */
+-              if (p->dl_server != &rq->fair_server)
++              if (dl_server_active(&rq->fair_server))
+                       dl_server_update(&rq->fair_server, delta_exec);
+       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch b/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch

new file mode 100644 (file)

index 0000000..6106815
--- /dev/null
+++ b/queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch
@@ -0,0 +1,282 @@
+From 6d6343fb58e0c45c04860781661503dcde5fbd6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Dec 2024 18:45:57 +0100
+Subject: sched/eevdf: More PELT vs DELAYED_DEQUEUE
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 76f2f783294d7d55c2564e2dfb0a7279ba0bc264 ]
+
+Vincent and Dietmar noted that while
+commit fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE") fixes
+the entity runnable stats, it does not adjust the cfs_rq runnable stats,
+which are based off of h_nr_running.
+
+Track h_nr_delayed such that we can discount those and adjust the
+signal.
+
+Fixes: fc1892becd56 ("sched/eevdf: Fixup PELT vs DELAYED_DEQUEUE")
+Closes: https://lore.kernel.org/lkml/a9a45193-d0c6-4ba2-a822-464ad30b550e@arm.com/
+Closes: https://lore.kernel.org/lkml/CAKfTPtCNUvWE_GX5LyvTF-WdxUT=ZgvZZv-4t=eWntg5uOFqiQ@mail.gmail.com/
+[ Fixes checkpatch warnings and rebased ]
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reported-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Link: https://lore.kernel.org/r/20241202174606.4074512-3-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/debug.c |  1 +
+ kernel/sched/fair.c  | 51 +++++++++++++++++++++++++++++++++++++++-----
+ kernel/sched/pelt.c  |  2 +-
+ kernel/sched/sched.h |  8 +++++--
+ 4 files changed, 54 insertions(+), 8 deletions(-)
+
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index f4035c7a0fa1..82b165bf48c4 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -844,6 +844,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
+       SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
+       SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
++      SEQ_printf(m, "  .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed);
+       SEQ_printf(m, "  .%-30s: %d\n", "idle_nr_running",
+                       cfs_rq->idle_nr_running);
+       SEQ_printf(m, "  .%-30s: %d\n", "idle_h_nr_running",
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index c467e389cd6f..93142f9077c7 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5471,9 +5471,33 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ 
+ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+ 
+-static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
++static void set_delayed(struct sched_entity *se)
++{
++      se->sched_delayed = 1;
++      for_each_sched_entity(se) {
++              struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++              cfs_rq->h_nr_delayed++;
++              if (cfs_rq_throttled(cfs_rq))
++                      break;
++      }
++}
++
++static void clear_delayed(struct sched_entity *se)
+ {
+       se->sched_delayed = 0;
++      for_each_sched_entity(se) {
++              struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++              cfs_rq->h_nr_delayed--;
++              if (cfs_rq_throttled(cfs_rq))
++                      break;
++      }
++}
++
++static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
++{
++      clear_delayed(se);
+       if (sched_feat(DELAY_ZERO) && se->vlag > 0)
+               se->vlag = 0;
+ }
+@@ -5502,7 +5526,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+               if (sched_feat(DELAY_DEQUEUE) && delay &&
+                   !entity_eligible(cfs_rq, se)) {
+                       update_load_avg(cfs_rq, se, 0);
+-                      se->sched_delayed = 1;
++                      set_delayed(se);
+                       return false;
+               }
+       }
+@@ -5920,7 +5944,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+       struct rq *rq = rq_of(cfs_rq);
+       struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+       struct sched_entity *se;
+-      long task_delta, idle_task_delta, dequeue = 1;
++      long task_delta, idle_task_delta, delayed_delta, dequeue = 1;
+       long rq_h_nr_running = rq->cfs.h_nr_running;
+ 
+       raw_spin_lock(&cfs_b->lock);
+@@ -5953,6 +5977,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+       task_delta = cfs_rq->h_nr_running;
+       idle_task_delta = cfs_rq->idle_h_nr_running;
++      delayed_delta = cfs_rq->h_nr_delayed;
+       for_each_sched_entity(se) {
+               struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+               int flags;
+@@ -5976,6 +6001,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+               qcfs_rq->h_nr_running -= task_delta;
+               qcfs_rq->idle_h_nr_running -= idle_task_delta;
++              qcfs_rq->h_nr_delayed -= delayed_delta;
+ 
+               if (qcfs_rq->load.weight) {
+                       /* Avoid re-evaluating load for this entity: */
+@@ -5998,6 +6024,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+               qcfs_rq->h_nr_running -= task_delta;
+               qcfs_rq->idle_h_nr_running -= idle_task_delta;
++              qcfs_rq->h_nr_delayed -= delayed_delta;
+       }
+ 
+       /* At this point se is NULL and we are at root level*/
+@@ -6023,7 +6050,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+       struct rq *rq = rq_of(cfs_rq);
+       struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+       struct sched_entity *se;
+-      long task_delta, idle_task_delta;
++      long task_delta, idle_task_delta, delayed_delta;
+       long rq_h_nr_running = rq->cfs.h_nr_running;
+ 
+       se = cfs_rq->tg->se[cpu_of(rq)];
+@@ -6059,6 +6086,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+       task_delta = cfs_rq->h_nr_running;
+       idle_task_delta = cfs_rq->idle_h_nr_running;
++      delayed_delta = cfs_rq->h_nr_delayed;
+       for_each_sched_entity(se) {
+               struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+ 
+@@ -6076,6 +6104,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+               qcfs_rq->h_nr_running += task_delta;
+               qcfs_rq->idle_h_nr_running += idle_task_delta;
++              qcfs_rq->h_nr_delayed += delayed_delta;
+ 
+               /* end evaluation on encountering a throttled cfs_rq */
+               if (cfs_rq_throttled(qcfs_rq))
+@@ -6093,6 +6122,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+ 
+               qcfs_rq->h_nr_running += task_delta;
+               qcfs_rq->idle_h_nr_running += idle_task_delta;
++              qcfs_rq->h_nr_delayed += delayed_delta;
+ 
+               /* end evaluation on encountering a throttled cfs_rq */
+               if (cfs_rq_throttled(qcfs_rq))
+@@ -6946,7 +6976,7 @@ requeue_delayed_entity(struct sched_entity *se)
+       }
+ 
+       update_load_avg(cfs_rq, se, 0);
+-      se->sched_delayed = 0;
++      clear_delayed(se);
+ }
+ 
+ /*
+@@ -6960,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+       struct cfs_rq *cfs_rq;
+       struct sched_entity *se = &p->se;
+       int idle_h_nr_running = task_has_idle_policy(p);
++      int h_nr_delayed = 0;
+       int task_new = !(flags & ENQUEUE_WAKEUP);
+       int rq_h_nr_running = rq->cfs.h_nr_running;
+       u64 slice = 0;
+@@ -6986,6 +7017,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+       if (p->in_iowait)
+               cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
+ 
++      if (task_new)
++              h_nr_delayed = !!se->sched_delayed;
++
+       for_each_sched_entity(se) {
+               if (se->on_rq) {
+                       if (se->sched_delayed)
+@@ -7008,6 +7042,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 
+               cfs_rq->h_nr_running++;
+               cfs_rq->idle_h_nr_running += idle_h_nr_running;
++              cfs_rq->h_nr_delayed += h_nr_delayed;
+ 
+               if (cfs_rq_is_idle(cfs_rq))
+                       idle_h_nr_running = 1;
+@@ -7031,6 +7066,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 
+               cfs_rq->h_nr_running++;
+               cfs_rq->idle_h_nr_running += idle_h_nr_running;
++              cfs_rq->h_nr_delayed += h_nr_delayed;
+ 
+               if (cfs_rq_is_idle(cfs_rq))
+                       idle_h_nr_running = 1;
+@@ -7093,6 +7129,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+       struct task_struct *p = NULL;
+       int idle_h_nr_running = 0;
+       int h_nr_running = 0;
++      int h_nr_delayed = 0;
+       struct cfs_rq *cfs_rq;
+       u64 slice = 0;
+ 
+@@ -7100,6 +7137,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+               p = task_of(se);
+               h_nr_running = 1;
+               idle_h_nr_running = task_has_idle_policy(p);
++              if (!task_sleep && !task_delayed)
++                      h_nr_delayed = !!se->sched_delayed;
+       } else {
+               cfs_rq = group_cfs_rq(se);
+               slice = cfs_rq_min_slice(cfs_rq);
+@@ -7117,6 +7156,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ 
+               cfs_rq->h_nr_running -= h_nr_running;
+               cfs_rq->idle_h_nr_running -= idle_h_nr_running;
++              cfs_rq->h_nr_delayed -= h_nr_delayed;
+ 
+               if (cfs_rq_is_idle(cfs_rq))
+                       idle_h_nr_running = h_nr_running;
+@@ -7155,6 +7195,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
+ 
+               cfs_rq->h_nr_running -= h_nr_running;
+               cfs_rq->idle_h_nr_running -= idle_h_nr_running;
++              cfs_rq->h_nr_delayed -= h_nr_delayed;
+ 
+               if (cfs_rq_is_idle(cfs_rq))
+                       idle_h_nr_running = h_nr_running;
+diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
+index a9c65d97b3ca..171a802420a1 100644
+--- a/kernel/sched/pelt.c
++++ b/kernel/sched/pelt.c
+@@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
+ {
+       if (___update_load_sum(now, &cfs_rq->avg,
+                               scale_load_down(cfs_rq->load.weight),
+-                              cfs_rq->h_nr_running,
++                              cfs_rq->h_nr_running - cfs_rq->h_nr_delayed,
+                               cfs_rq->curr != NULL)) {
+ 
+               ___update_load_avg(&cfs_rq->avg, 1);
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index c03b3d7b320e..c53696275ca1 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -649,6 +649,7 @@ struct cfs_rq {
+       unsigned int            h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
+       unsigned int            idle_nr_running;   /* SCHED_IDLE */
+       unsigned int            idle_h_nr_running; /* SCHED_IDLE */
++      unsigned int            h_nr_delayed;
+ 
+       s64                     avg_vruntime;
+       u64                     avg_load;
+@@ -898,8 +899,11 @@ struct dl_rq {
+ 
+ static inline void se_update_runnable(struct sched_entity *se)
+ {
+-      if (!entity_is_task(se))
+-              se->runnable_weight = se->my_q->h_nr_running;
++      if (!entity_is_task(se)) {
++              struct cfs_rq *cfs_rq = se->my_q;
++
++              se->runnable_weight = cfs_rq->h_nr_running - cfs_rq->h_nr_delayed;
++      }
+ }
+ 
+ static inline long se_runnable(struct sched_entity *se)
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-fair-fix-next_buddy.patch b/queue-6.12/sched-fair-fix-next_buddy.patch

new file mode 100644 (file)

index 0000000..8c35cd2
--- /dev/null
+++ b/queue-6.12/sched-fair-fix-next_buddy.patch
@@ -0,0 +1,69 @@
+From 4ad329aa5f61d1ce6c81094d25ee4a35dc4e5ebb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Nov 2024 12:59:54 +0530
+Subject: sched/fair: Fix NEXT_BUDDY
+
+From: K Prateek Nayak <kprateek.nayak@amd.com>
+
+[ Upstream commit 493afbd187c4c9cc1642792c0d9ba400c3d6d90d ]
+
+Adam reports that enabling NEXT_BUDDY insta triggers a WARN in
+pick_next_entity().
+
+Moving clear_buddies() up before the delayed dequeue bits ensures
+no ->next buddy becomes delayed. Further ensure no new ->next buddy
+ever starts as delayed.
+
+Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
+Reported-by: Adam Li <adamli@os.amperecomputing.com>
+Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Adam Li <adamli@os.amperecomputing.com>
+Link: https://lkml.kernel.org/r/670a0d54-e398-4b1f-8a6e-90784e2fdf89@amd.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 782ce70ebd1b..c467e389cd6f 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5484,6 +5484,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+       bool sleep = flags & DEQUEUE_SLEEP;
+ 
+       update_curr(cfs_rq);
++      clear_buddies(cfs_rq, se);
+ 
+       if (flags & DEQUEUE_DELAYED) {
+               SCHED_WARN_ON(!se->sched_delayed);
+@@ -5500,8 +5501,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 
+               if (sched_feat(DELAY_DEQUEUE) && delay &&
+                   !entity_eligible(cfs_rq, se)) {
+-                      if (cfs_rq->next == se)
+-                              cfs_rq->next = NULL;
+                       update_load_avg(cfs_rq, se, 0);
+                       se->sched_delayed = 1;
+                       return false;
+@@ -5526,8 +5525,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ 
+       update_stats_dequeue_fair(cfs_rq, se, flags);
+ 
+-      clear_buddies(cfs_rq, se);
+-
+       update_entity_lag(cfs_rq, se);
+       if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
+               se->deadline -= se->vruntime;
+@@ -8786,7 +8783,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
+       if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
+               return;
+ 
+-      if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
++      if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
+               set_next_buddy(pse);
+       }
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch b/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch

new file mode 100644 (file)

index 0000000..c05fff2
--- /dev/null
+++ b/queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch
@@ -0,0 +1,42 @@
+From 4293c828ce43bf82f65abe9fd85d5138ddb58d99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Dec 2024 18:45:56 +0100
+Subject: sched/fair: Fix sched_can_stop_tick() for fair tasks
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit c1f43c342e1f2e32f0620bf2e972e2a9ea0a1e60 ]
+
+We can't stop the tick of a rq if there are at least 2 tasks enqueued in
+the whole hierarchy and not only at the root cfs rq.
+
+rq->cfs.nr_running tracks the number of sched_entity at one level
+whereas rq->cfs.h_nr_running tracks all queued tasks in the
+hierarchy.
+
+Fixes: 11cc374f4643b ("sched_ext: Simplify scx_can_stop_tick() invocation in sched_can_stop_tick()")
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Link: https://lore.kernel.org/r/20241202174606.4074512-2-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 6cc12777bb11..d07dc87787df 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1300,7 +1300,7 @@ bool sched_can_stop_tick(struct rq *rq)
+       if (scx_enabled() && !scx_can_stop_tick(rq))
+               return false;
+ 
+-      if (rq->cfs.nr_running > 1)
++      if (rq->cfs.h_nr_running > 1)
+               return false;
+ 
+       /*
+-- 
+2.39.5
+
diff --git a/queue-6.12/series b/queue-6.12/series

index 19e6f6cda0b627c330477e59035f2ed37140d7d8..9a2830f783aad65cee09d6f33feccf224d7af986 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -1 +1,27 @@
  net-sched-fix-ordering-of-qlen-adjustment.patch
+net-stmmac-fix-tso-dma-api-usage-causing-oops.patch
+firmware-arm_scmi-fix-i.mx-build-dependency.patch
+firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch
+risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch
+sched-fair-fix-next_buddy.patch
+sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch
+sched-eevdf-more-pelt-vs-delayed_dequeue.patch
+p2sb-factor-out-p2sb_read_from_cache.patch
+p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch
+p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch
+p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch
+i2c-pnx-fix-timeout-in-wait-functions.patch
+s390-ipl-fix-never-less-than-zero-warning.patch
+erofs-fix-psi-memstall-accounting.patch
+sched-dlserver-fix-dlserver-double-enqueue.patch
+sched-dlserver-fix-dlserver-time-accounting.patch
+s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch
+erofs-add-erofs_sb_free-helper.patch
+erofs-use-struct-erofs_device_info-for-the-primary-d.patch
+erofs-reference-struct-erofs_device_info-for-erofs_m.patch
+erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch
+xfs-sb_spino_align-is-not-verified.patch
+xfs-fix-sparse-inode-limits-on-runt-ag.patch
+xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch
+xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch
+xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch
diff --git a/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch b/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch

new file mode 100644 (file)

index 0000000..50a8262
--- /dev/null
+++ b/queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch
@@ -0,0 +1,114 @@
+From 289808d43a0780664c93ce21d6b74573775fbc3b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:52 -0800
+Subject: xfs: fix off-by-one error in fsmap's end_daddr usage
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit a440a28ddbdcb861150987b4d6e828631656b92f upstream.
+
+In commit ca6448aed4f10a, we created an "end_daddr" variable to fix
+fsmap reporting when the end of the range requested falls in the middle
+of an unknown (aka free on the rmapbt) region.  Unfortunately, I didn't
+notice that the the code sets end_daddr to the last sector of the device
+but then uses that quantity to compute the length of the synthesized
+mapping.
+
+Zizhi Wo later observed that when end_daddr isn't set, we still don't
+report the last fsblock on a device because in that case (aka when
+info->last is true), the info->high mapping that we pass to
+xfs_getfsmap_group_helper has a startblock that points to the last
+fsblock.  This is also wrong because the code uses startblock to
+compute the length of the synthesized mapping.
+
+Fix the second problem by setting end_daddr unconditionally, and fix the
+first problem by setting start_daddr to one past the end of the range to
+query.
+
+Cc: <stable@vger.kernel.org> # v6.11
+Fixes: ca6448aed4f10a ("xfs: Fix missing interval for missing_owner in xfs fsmap")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reported-by: Zizhi Wo <wozizhi@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_fsmap.c | 29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
+index ae18ab86e608..8712b891defb 100644
+--- a/fs/xfs/xfs_fsmap.c
++++ b/fs/xfs/xfs_fsmap.c
+@@ -162,7 +162,8 @@ struct xfs_getfsmap_info {
+       xfs_daddr_t             next_daddr;     /* next daddr we expect */
+       /* daddr of low fsmap key when we're using the rtbitmap */
+       xfs_daddr_t             low_daddr;
+-      xfs_daddr_t             end_daddr;      /* daddr of high fsmap key */
++      /* daddr of high fsmap key, or the last daddr on the device */
++      xfs_daddr_t             end_daddr;
+       u64                     missing_owner;  /* owner of holes */
+       u32                     dev;            /* device id */
+       /*
+@@ -306,7 +307,7 @@ xfs_getfsmap_helper(
+        * Note that if the btree query found a mapping, there won't be a gap.
+        */
+       if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
+-              rec_daddr = info->end_daddr;
++              rec_daddr = info->end_daddr + 1;
+ 
+       /* Are we just counting mappings? */
+       if (info->head->fmh_count == 0) {
+@@ -898,7 +899,10 @@ xfs_getfsmap(
+       struct xfs_trans                *tp = NULL;
+       struct xfs_fsmap                dkeys[2];       /* per-dev keys */
+       struct xfs_getfsmap_dev         handlers[XFS_GETFSMAP_DEVS];
+-      struct xfs_getfsmap_info        info = { NULL };
++      struct xfs_getfsmap_info        info = {
++              .fsmap_recs             = fsmap_recs,
++              .head                   = head,
++      };
+       bool                            use_rmap;
+       int                             i;
+       int                             error = 0;
+@@ -963,9 +967,6 @@ xfs_getfsmap(
+ 
+       info.next_daddr = head->fmh_keys[0].fmr_physical +
+                         head->fmh_keys[0].fmr_length;
+-      info.end_daddr = XFS_BUF_DADDR_NULL;
+-      info.fsmap_recs = fsmap_recs;
+-      info.head = head;
+ 
+       /* For each device we support... */
+       for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
+@@ -978,17 +979,23 @@ xfs_getfsmap(
+                       break;
+ 
+               /*
+-               * If this device number matches the high key, we have
+-               * to pass the high key to the handler to limit the
+-               * query results.  If the device number exceeds the
+-               * low key, zero out the low key so that we get
+-               * everything from the beginning.
++               * If this device number matches the high key, we have to pass
++               * the high key to the handler to limit the query results, and
++               * set the end_daddr so that we can synthesize records at the
++               * end of the query range or device.
+                */
+               if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
+                       dkeys[1] = head->fmh_keys[1];
+                       info.end_daddr = min(handlers[i].nr_sectors - 1,
+                                            dkeys[1].fmr_physical);
++              } else {
++                      info.end_daddr = handlers[i].nr_sectors - 1;
+               }
++
++              /*
++               * If the device number exceeds the low key, zero out the low
++               * key so that we get everything from the beginning.
++               */
+               if (handlers[i].dev > head->fmh_keys[0].fmr_device)
+                       memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch b/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch

new file mode 100644 (file)

index 0000000..09461d0
--- /dev/null
+++ b/queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch
@@ -0,0 +1,89 @@
+From 3720905f499187a244d9070293aed91dc03c4b45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:51:07 -0800
+Subject: xfs: fix sb_spino_align checks for large fsblock sizes
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit 7f8a44f37229fc76bfcafa341a4b8862368ef44a upstream.
+
+For a sparse inodes filesystem, mkfs.xfs computes the values of
+sb_spino_align and sb_inoalignmt with the following code:
+
+       int     cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+
+       if (cfg->sb_feat.crcs_enabled)
+               cluster_size *= cfg->inodesize / XFS_DINODE_MIN_SIZE;
+
+       sbp->sb_spino_align = cluster_size >> cfg->blocklog;
+       sbp->sb_inoalignmt = XFS_INODES_PER_CHUNK *
+                       cfg->inodesize >> cfg->blocklog;
+
+On a V5 filesystem with 64k fsblocks and 512 byte inodes, this results
+in cluster_size = 8192 * (512 / 256) = 16384.  As a result,
+sb_spino_align and sb_inoalignmt are both set to zero.  Unfortunately,
+this trips the new sb_spino_align check that was just added to
+xfs_validate_sb_common, and the mkfs fails:
+
+# mkfs.xfs -f -b size=64k, /dev/sda
+meta-data=/dev/sda               isize=512    agcount=4, agsize=81136 blks
+         =                       sectsz=512   attr=2, projid32bit=1
+         =                       crc=1        finobt=1, sparse=1, rmapbt=1
+         =                       reflink=1    bigtime=1 inobtcount=1 nrext64=1
+         =                       exchange=0   metadir=0
+data     =                       bsize=65536  blocks=324544, imaxpct=25
+         =                       sunit=0      swidth=0 blks
+naming   =version 2              bsize=65536  ascii-ci=0, ftype=1, parent=0
+log      =internal log           bsize=65536  blocks=5006, version=2
+         =                       sectsz=512   sunit=0 blks, lazy-count=1
+realtime =none                   extsz=65536  blocks=0, rtextents=0
+         =                       rgcount=0    rgsize=0 extents
+Discarding blocks...Sparse inode alignment (0) is invalid.
+Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
+libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
+mkfs.xfs: Releasing dirty buffer to free list!
+found dirty buffer (bulk) on free list!
+Sparse inode alignment (0) is invalid.
+Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
+libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
+mkfs.xfs: writing AG headers failed, err=22
+
+Prior to commit 59e43f5479cce1 this all worked fine, even if "sparse"
+inodes are somewhat meaningless when everything fits in a single
+fsblock.  Adjust the checks to handle existing filesystems.
+
+Cc: <stable@vger.kernel.org> # v6.13-rc1
+Fixes: 59e43f5479cce1 ("xfs: sb_spino_align is not verified")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index 9e0ae312bc80..e27b63281d01 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -392,12 +392,13 @@ xfs_validate_sb_common(
+                               return -EINVAL;
+                       }
+ 
+-                      if (!sbp->sb_spino_align ||
+-                          sbp->sb_spino_align > sbp->sb_inoalignmt ||
+-                          (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
++                      if (sbp->sb_spino_align &&
++                          (sbp->sb_spino_align > sbp->sb_inoalignmt ||
++                           (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) {
+                               xfs_warn(mp,
+-                              "Sparse inode alignment (%u) is invalid.",
+-                                      sbp->sb_spino_align);
++"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).",
++                                      sbp->sb_spino_align,
++                                      sbp->sb_inoalignmt);
+                               return -EINVAL;
+                       }
+               } else if (sbp->sb_spino_align) {
+-- 
+2.39.5
+
diff --git a/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch b/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch

new file mode 100644 (file)

index 0000000..e5daa55
--- /dev/null
+++ b/queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch
@@ -0,0 +1,89 @@
+From fe0558bded8b5cfdd3143a116b3ec5c965377716 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:36 -0800
+Subject: xfs: fix sparse inode limits on runt AG
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 13325333582d4820d39b9e8f63d6a54e745585d9 upstream.
+
+The runt AG at the end of a filesystem is almost always smaller than
+the mp->m_sb.sb_agblocks. Unfortunately, when setting the max_agbno
+limit for the inode chunk allocation, we do not take this into
+account. This means we can allocate a sparse inode chunk that
+overlaps beyond the end of an AG. When we go to allocate an inode
+from that sparse chunk, the irec fails validation because the
+agbno of the start of the irec is beyond valid limits for the runt
+AG.
+
+Prevent this from happening by taking into account the size of the
+runt AG when allocating inode chunks. Also convert the various
+checks for valid inode chunk agbnos to use xfs_ag_block_count()
+so that they will also catch such issues in the future.
+
+Fixes: 56d1115c9bc7 ("xfs: allocate sparse inode chunks on full chunk allocation failure")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+[djwong: backport to stable because upstream maintainer ignored cc-stable]
+Link: https://lore.kernel.org/linux-xfs/20241112231539.GG9438@frogsfrogsfrogs/
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_ialloc.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
+index 271855227514..6258527315f2 100644
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -855,7 +855,8 @@ xfs_ialloc_ag_alloc(
+                * the end of the AG.
+                */
+               args.min_agbno = args.mp->m_sb.sb_inoalignmt;
+-              args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
++              args.max_agbno = round_down(xfs_ag_block_count(args.mp,
++                                                      pag->pag_agno),
+                                           args.mp->m_sb.sb_inoalignmt) -
+                                igeo->ialloc_blks;
+ 
+@@ -2332,9 +2333,9 @@ xfs_difree(
+               return -EINVAL;
+       }
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+-      if (agbno >= mp->m_sb.sb_agblocks)  {
+-              xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
+-                      __func__, agbno, mp->m_sb.sb_agblocks);
++      if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
++              xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
++                      __func__, agbno, xfs_ag_block_count(mp, pag->pag_agno));
+               ASSERT(0);
+               return -EINVAL;
+       }
+@@ -2457,7 +2458,7 @@ xfs_imap(
+        */
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+-      if (agbno >= mp->m_sb.sb_agblocks ||
++      if (agbno >= xfs_ag_block_count(mp, pag->pag_agno) ||
+           ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+               error = -EINVAL;
+ #ifdef DEBUG
+@@ -2467,11 +2468,12 @@ xfs_imap(
+                */
+               if (flags & XFS_IGET_UNTRUSTED)
+                       return error;
+-              if (agbno >= mp->m_sb.sb_agblocks) {
++              if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
+                       xfs_alert(mp,
+               "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
+                               __func__, (unsigned long long)agbno,
+-                              (unsigned long)mp->m_sb.sb_agblocks);
++                              (unsigned long)xfs_ag_block_count(mp,
++                                                      pag->pag_agno));
+               }
+               if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+                       xfs_alert(mp,
+-- 
+2.39.5
+
diff --git a/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch b/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch

new file mode 100644 (file)

index 0000000..3b14a1c
--- /dev/null
+++ b/queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch
@@ -0,0 +1,84 @@
+From 6208be0894ce64483b9e968e9e5dbe3b4c7f31fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:51:23 -0800
+Subject: xfs: fix zero byte checking in the superblock scrubber
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit c004a793e0ec34047c3bd423bcd8966f5fac88dc upstream.
+
+The logic to check that the region past the end of the superblock is all
+zeroes is wrong -- we don't want to check only the bytes past the end of
+the maximally sized ondisk superblock structure as currently defined in
+xfs_format.h; we want to check the bytes beyond the end of the ondisk as
+defined by the feature bits.
+
+Port the superblock size logic from xfs_repair and then put it to use in
+xfs_scrub.
+
+Cc: <stable@vger.kernel.org> # v4.15
+Fixes: 21fb4cb1981ef7 ("xfs: scrub the secondary superblocks")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/agheader.c | 29 +++++++++++++++++++++++++++--
+ 1 file changed, 27 insertions(+), 2 deletions(-)
+
+diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
+index da30f926cbe6..0f2f1852d58f 100644
+--- a/fs/xfs/scrub/agheader.c
++++ b/fs/xfs/scrub/agheader.c
+@@ -59,6 +59,30 @@ xchk_superblock_xref(
+       /* scrub teardown will take care of sc->sa for us */
+ }
+ 
++/*
++ * Calculate the ondisk superblock size in bytes given the feature set of the
++ * mounted filesystem (aka the primary sb).  This is subtlely different from
++ * the logic in xfs_repair, which computes the size of a secondary sb given the
++ * featureset listed in the secondary sb.
++ */
++STATIC size_t
++xchk_superblock_ondisk_size(
++      struct xfs_mount        *mp)
++{
++      if (xfs_has_metauuid(mp))
++              return offsetofend(struct xfs_dsb, sb_meta_uuid);
++      if (xfs_has_crc(mp))
++              return offsetofend(struct xfs_dsb, sb_lsn);
++      if (xfs_sb_version_hasmorebits(&mp->m_sb))
++              return offsetofend(struct xfs_dsb, sb_bad_features2);
++      if (xfs_has_logv2(mp))
++              return offsetofend(struct xfs_dsb, sb_logsunit);
++      if (xfs_has_sector(mp))
++              return offsetofend(struct xfs_dsb, sb_logsectsize);
++      /* only support dirv2 or more recent */
++      return offsetofend(struct xfs_dsb, sb_dirblklog);
++}
++
+ /*
+  * Scrub the filesystem superblock.
+  *
+@@ -75,6 +99,7 @@ xchk_superblock(
+       struct xfs_buf          *bp;
+       struct xfs_dsb          *sb;
+       struct xfs_perag        *pag;
++      size_t                  sblen;
+       xfs_agnumber_t          agno;
+       uint32_t                v2_ok;
+       __be32                  features_mask;
+@@ -350,8 +375,8 @@ xchk_superblock(
+       }
+ 
+       /* Everything else must be zero. */
+-      if (memchr_inv(sb + 1, 0,
+-                      BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
++      sblen = xchk_superblock_ondisk_size(mp);
++      if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen))
+               xchk_block_set_corrupt(sc, bp);
+ 
+       xchk_superblock_xref(sc, bp);
+-- 
+2.39.5
+
diff --git a/queue-6.12/xfs-sb_spino_align-is-not-verified.patch b/queue-6.12/xfs-sb_spino_align-is-not-verified.patch

new file mode 100644 (file)

index 0000000..74635da
--- /dev/null
+++ b/queue-6.12/xfs-sb_spino_align-is-not-verified.patch
@@ -0,0 +1,52 @@
+From c5555322e43ec7aa41de5938da5bc8b4da32fba2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:50:20 -0800
+Subject: xfs: sb_spino_align is not verified
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 59e43f5479cce106d71c0b91a297c7ad1913176c upstream.
+
+It's just read in from the superblock and used without doing any
+validity checks at all on the value.
+
+Fixes: fb4f2b4e5a82 ("xfs: add sparse inode chunk alignment superblock field")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+[djwong: actually tag for 6.12 because upstream maintainer ignored cc-stable tag]
+Link: https://lore.kernel.org/linux-xfs/20241024165544.GI21853@frogsfrogsfrogs/
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
+index 02ebcbc4882f..9e0ae312bc80 100644
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -391,6 +391,20 @@ xfs_validate_sb_common(
+                                        sbp->sb_inoalignmt, align);
+                               return -EINVAL;
+                       }
++
++                      if (!sbp->sb_spino_align ||
++                          sbp->sb_spino_align > sbp->sb_inoalignmt ||
++                          (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
++                              xfs_warn(mp,
++                              "Sparse inode alignment (%u) is invalid.",
++                                      sbp->sb_spino_align);
++                              return -EINVAL;
++                      }
++              } else if (sbp->sb_spino_align) {
++                      xfs_warn(mp,
++                              "Sparse inode alignment (%u) should be zero.",
++                              sbp->sb_spino_align);
++                      return -EINVAL;
+               }
+       } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
+                               XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
+-- 
+2.39.5
+
author	Sasha Levin <sashal@kernel.org>
	Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 19 Dec 2024 19:23:46 +0000 (14:23 -0500)
queue-6.12/erofs-add-erofs_sb_free-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/erofs-fix-psi-memstall-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/erofs-reference-struct-erofs_device_info-for-erofs_m.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/erofs-use-buffered-i-o-for-file-backed-mounts-by-def.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/erofs-use-struct-erofs_device_info-for-the-primary-d.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/firmware-arm_ffa-fix-the-race-around-setting-ffa_dev.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/firmware-arm_scmi-fix-i.mx-build-dependency.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/i2c-pnx-fix-timeout-in-wait-functions.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/net-stmmac-fix-tso-dma-api-usage-causing-oops.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/p2sb-do-not-scan-and-remove-the-p2sb-device-when-it-.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/p2sb-factor-out-p2sb_read_from_cache.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/p2sb-introduce-the-global-flag-p2sb_hidden_by_bios.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/p2sb-move-p2sb-hide-and-unhide-code-to-p2sb_scan_and.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/risc-v-kvm-fix-csr_write-csr_set-for-hvien-pmu-overf.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/s390-ipl-fix-never-less-than-zero-warning.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/s390-mm-consider-kmsan-modules-metadata-for-paging-l.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/sched-dlserver-fix-dlserver-double-enqueue.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/sched-dlserver-fix-dlserver-time-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/sched-eevdf-more-pelt-vs-delayed_dequeue.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/sched-fair-fix-next_buddy.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/sched-fair-fix-sched_can_stop_tick-for-fair-tasks.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history
queue-6.12/xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-fix-sparse-inode-limits-on-runt-ag.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/xfs-sb_spino_align-is-not-verified.patch	[new file with mode: 0644]	patch \| blob