Fixes for 6.5

author Sasha Levin <sashal@kernel.org>

Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)
diff --git a/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch b/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch

new file mode 100644 (file)

index 0000000..d4f6099
--- /dev/null
+++ b/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch
@@ -0,0 +1,92 @@
+From bb77c7aa35e7d1ac1e93cc3c5fd4432f65b3e4cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Sep 2023 14:06:19 +0800
+Subject: dmaengine: idxd: use spin_lock_irqsave before wait_event_lock_irq
+
+From: Rex Zhang <rex.zhang@intel.com>
+
+[ Upstream commit c0409dd3d151f661e7e57b901a81a02565df163c ]
+
+In idxd_cmd_exec(), wait_event_lock_irq() explicitly calls
+spin_unlock_irq()/spin_lock_irq(). If the interrupt is on before entering
+wait_event_lock_irq(), it will become off status after
+wait_event_lock_irq() is called. Later, wait_for_completion() may go to
+sleep but irq is disabled. The scenario is warned in might_sleep().
+
+Fix it by using spin_lock_irqsave() instead of the primitive spin_lock()
+to save the irq status before entering wait_event_lock_irq() and using
+spin_unlock_irqrestore() instead of the primitive spin_unlock() to restore
+the irq status before entering wait_for_completion().
+
+Before the change:
+idxd_cmd_exec() {
+interrupt is on
+spin_lock()                        // interrupt is on
+       wait_event_lock_irq()
+               spin_unlock_irq()  // interrupt is enabled
+               ...
+               spin_lock_irq()    // interrupt is disabled
+spin_unlock()                      // interrupt is still disabled
+wait_for_completion()              // report "BUG: sleeping function
+                                  // called from invalid context...
+                                  // in_atomic() irqs_disabled()"
+}
+
+After applying spin_lock_irqsave():
+idxd_cmd_exec() {
+interrupt is on
+spin_lock_irqsave()                // save the on state
+                                  // interrupt is disabled
+       wait_event_lock_irq()
+               spin_unlock_irq()  // interrupt is enabled
+               ...
+               spin_lock_irq()    // interrupt is disabled
+spin_unlock_irqrestore()           // interrupt is restored to on
+wait_for_completion()              // No Call trace
+}
+
+Fixes: f9f4082dbc56 ("dmaengine: idxd: remove interrupt disable for cmd_lock")
+Signed-off-by: Rex Zhang <rex.zhang@intel.com>
+Signed-off-by: Lijun Pan <lijun.pan@intel.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
+Link: https://lore.kernel.org/r/20230916060619.3744220-1-rex.zhang@intel.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/idxd/device.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
+index 9a15f0d12c799..97b505f1115ab 100644
+--- a/drivers/dma/idxd/device.c
++++ b/drivers/dma/idxd/device.c
+@@ -492,6 +492,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+       union idxd_command_reg cmd;
+       DECLARE_COMPLETION_ONSTACK(done);
+       u32 stat;
++      unsigned long flags;
+ 
+       if (idxd_device_is_halted(idxd)) {
+               dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
+@@ -505,7 +506,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+       cmd.operand = operand;
+       cmd.int_req = 1;
+ 
+-      spin_lock(&idxd->cmd_lock);
++      spin_lock_irqsave(&idxd->cmd_lock, flags);
+       wait_event_lock_irq(idxd->cmd_waitq,
+                           !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
+                           idxd->cmd_lock);
+@@ -522,7 +523,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+        * After command submitted, release lock and go to sleep until
+        * the command completes via interrupt.
+        */
+-      spin_unlock(&idxd->cmd_lock);
++      spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+       wait_for_completion(&done);
+       stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+       spin_lock(&idxd->cmd_lock);
+-- 
+2.40.1
+
diff --git a/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch b/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch

new file mode 100644 (file)

index 0000000..e66fcf2
--- /dev/null
+++ b/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch
@@ -0,0 +1,53 @@
+From 45b31de64194cf4a883bbb605cc9b2cdec33ebd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Aug 2023 11:25:11 +0800
+Subject: dmaengine: mediatek: Fix deadlock caused by synchronize_irq()
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit 01f1ae2733e2bb4de92fefcea5fda847d92aede1 ]
+
+The synchronize_irq(c->irq) will not return until the IRQ handler
+mtk_uart_apdma_irq_handler() is completed. If the synchronize_irq()
+holds a spin_lock and waits the IRQ handler to complete, but the
+IRQ handler also needs the same spin_lock. The deadlock will happen.
+The process is shown below:
+
+          cpu0                        cpu1
+mtk_uart_apdma_device_pause() | mtk_uart_apdma_irq_handler()
+  spin_lock_irqsave()         |
+                              |   spin_lock_irqsave()
+  //hold the lock to wait     |
+  synchronize_irq()           |
+
+This patch reorders the synchronize_irq(c->irq) outside the spin_lock
+in order to mitigate the bug.
+
+Fixes: 9135408c3ace ("dmaengine: mediatek: Add MediaTek UART APDMA support")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Reviewed-by: Eugen Hristev <eugen.hristev@collabora.com>
+Link: https://lore.kernel.org/r/20230806032511.45263-1-duoming@zju.edu.cn
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma/mediatek/mtk-uart-apdma.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
+index a1517ef1f4a01..0acf6a92a4ad3 100644
+--- a/drivers/dma/mediatek/mtk-uart-apdma.c
++++ b/drivers/dma/mediatek/mtk-uart-apdma.c
+@@ -451,9 +451,8 @@ static int mtk_uart_apdma_device_pause(struct dma_chan *chan)
+       mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
+       mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
+ 
+-      synchronize_irq(c->irq);
+-
+       spin_unlock_irqrestore(&c->vc.lock, flags);
++      synchronize_irq(c->irq);
+ 
+       return 0;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch b/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch

new file mode 100644 (file)

index 0000000..b8682ba
--- /dev/null
+++ b/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch
@@ -0,0 +1,103 @@
+From a7311f1016b7c251e3d01c338e2da0aa9a0fed85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Oct 2023 15:24:17 +0300
+Subject: fs: factor out vfs_parse_monolithic_sep() helper
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e001d1447cd4585d7f23a44ff668ba2bc624badb ]
+
+Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(),
+so filesystems could use it with a custom option separator callback.
+
+Acked-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Stable-dep-of: c34706acf40b ("ovl: fix regression in parsing of mount options with escaped comma")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs_context.c            | 34 +++++++++++++++++++++++++++++-----
+ include/linux/fs_context.h |  2 ++
+ 2 files changed, 31 insertions(+), 5 deletions(-)
+
+diff --git a/fs/fs_context.c b/fs/fs_context.c
+index a48a69caddce1..896e89acac5c2 100644
+--- a/fs/fs_context.c
++++ b/fs/fs_context.c
+@@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+ EXPORT_SYMBOL(vfs_parse_fs_string);
+ 
+ /**
+- * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
++ * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data
+  * @fc: The superblock configuration to fill in.
+  * @data: The data to parse
++ * @sep: callback for separating next option
+  *
+- * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+- * called from the ->monolithic_mount_data() fs_context operation.
++ * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom
++ * option separator callback.
+  *
+  * Returns 0 on success or the error returned by the ->parse_option() fs_context
+  * operation on failure.
+  */
+-int generic_parse_monolithic(struct fs_context *fc, void *data)
++int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
++                           char *(*sep)(char **))
+ {
+       char *options = data, *key;
+       int ret = 0;
+@@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data)
+       if (ret)
+               return ret;
+ 
+-      while ((key = strsep(&options, ",")) != NULL) {
++      while ((key = sep(&options)) != NULL) {
+               if (*key) {
+                       size_t v_len = 0;
+                       char *value = strchr(key, '=');
+@@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data)
+ 
+       return ret;
+ }
++EXPORT_SYMBOL(vfs_parse_monolithic_sep);
++
++static char *vfs_parse_comma_sep(char **s)
++{
++      return strsep(s, ",");
++}
++
++/**
++ * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
++ * @fc: The superblock configuration to fill in.
++ * @data: The data to parse
++ *
++ * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
++ * called from the ->monolithic_mount_data() fs_context operation.
++ *
++ * Returns 0 on success or the error returned by the ->parse_option() fs_context
++ * operation on failure.
++ */
++int generic_parse_monolithic(struct fs_context *fc, void *data)
++{
++      return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep);
++}
+ EXPORT_SYMBOL(generic_parse_monolithic);
+ 
+ /**
+diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
+index ff6341e09925b..ae556dc8e18fe 100644
+--- a/include/linux/fs_context.h
++++ b/include/linux/fs_context.h
+@@ -135,6 +135,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
+ extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
+ extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+                              const char *value, size_t v_size);
++int vfs_parse_monolithic_sep(struct fs_context *fc, void *data,
++                           char *(*sep)(char **));
+ extern int generic_parse_monolithic(struct fs_context *fc, void *data);
+ extern int vfs_get_tree(struct fs_context *fc);
+ extern void put_fs_context(struct fs_context *fc);
+-- 
+2.40.1
+
diff --git a/queue-6.5/fs-fix-kernel-doc-warnings.patch b/queue-6.5/fs-fix-kernel-doc-warnings.patch

new file mode 100644 (file)

index 0000000..e8c10fd
--- /dev/null
+++ b/queue-6.5/fs-fix-kernel-doc-warnings.patch
@@ -0,0 +1,198 @@
+From 04c658698602b678d1548dd99a0164b84d06e48c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Aug 2023 21:08:24 +0100
+Subject: fs: Fix kernel-doc warnings
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+[ Upstream commit 35931eb3945b8d38c31f8e956aee3cf31c52121b ]
+
+These have a variety of causes and a corresponding variety of solutions.
+
+Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Message-Id: <20230818200824.2720007-1-willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: c34706acf40b ("ovl: fix regression in parsing of mount options with escaped comma")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/file.c             |  3 ++-
+ fs/fs_context.c       | 12 +++++++++---
+ fs/ioctl.c            | 10 +++++++---
+ fs/kernel_read_file.c | 12 ++++++------
+ fs/namei.c            |  3 +++
+ fs/open.c             |  4 ++--
+ 6 files changed, 29 insertions(+), 15 deletions(-)
+
+diff --git a/fs/file.c b/fs/file.c
+index 3fd003a8604f8..568a98178007c 100644
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
+ 
+ /**
+  * last_fd - return last valid index into fd table
+- * @cur_fds: files struct
++ * @fdt: File descriptor table.
+  *
+  * Context: Either rcu read lock or files_lock must be held.
+  *
+@@ -723,6 +723,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+  *
+  * @fd:     starting file descriptor to close
+  * @max_fd: last file descriptor to close
++ * @flags:  CLOSE_RANGE flags.
+  *
+  * This closes a range of file descriptors. All file descriptors
+  * from @fd up to and including @max_fd are closed.
+diff --git a/fs/fs_context.c b/fs/fs_context.c
+index 375023e40161d..a48a69caddce1 100644
+--- a/fs/fs_context.c
++++ b/fs/fs_context.c
+@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
+ 
+ /**
+  * vfs_parse_fs_string - Convenience function to just parse a string.
++ * @fc: Filesystem context.
++ * @key: Parameter name.
++ * @value: Default value.
++ * @v_size: Maximum number of bytes in the value.
+  */
+ int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+                       const char *value, size_t v_size)
+@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string);
+ 
+ /**
+  * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
+- * @ctx: The superblock configuration to fill in.
++ * @fc: The superblock configuration to fill in.
+  * @data: The data to parse
+  *
+  * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+@@ -354,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc)
+ static void legacy_fs_context_free(struct fs_context *fc);
+ 
+ /**
+- * vfs_dup_fc_config: Duplicate a filesystem context.
++ * vfs_dup_fs_context - Duplicate a filesystem context.
+  * @src_fc: The context to copy.
+  */
+ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
+@@ -400,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
+ 
+ /**
+  * logfc - Log a message to a filesystem context
+- * @fc: The filesystem context to log to.
++ * @log: The filesystem context to log to, or NULL to use printk.
++ * @prefix: A string to prefix the output with, or NULL.
++ * @level: 'w' for a warning, 'e' for an error.  Anything else is a notice.
+  * @fmt: The format of the buffer.
+  */
+ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
+diff --git a/fs/ioctl.c b/fs/ioctl.c
+index 5b2481cd47501..d413e0b8f6c29 100644
+--- a/fs/ioctl.c
++++ b/fs/ioctl.c
+@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
+  * Returns 0 on success, -errno on error, 1 if this was the last
+  * extent that will fit in user array.
+  */
+-#define SET_UNKNOWN_FLAGS     (FIEMAP_EXTENT_DELALLOC)
+-#define SET_NO_UNMOUNTED_IO_FLAGS     (FIEMAP_EXTENT_DATA_ENCRYPTED)
+-#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
+                           u64 phys, u64 len, u32 flags)
+ {
+@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
+       if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+               return 1;
+ 
++#define SET_UNKNOWN_FLAGS     (FIEMAP_EXTENT_DELALLOC)
++#define SET_NO_UNMOUNTED_IO_FLAGS     (FIEMAP_EXTENT_DATA_ENCRYPTED)
++#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
++
+       if (flags & SET_UNKNOWN_FLAGS)
+               flags |= FIEMAP_EXTENT_UNKNOWN;
+       if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
+@@ -877,6 +878,9 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+ #ifdef CONFIG_COMPAT
+ /**
+  * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
++ * @file: The file to operate on.
++ * @cmd: The ioctl command number.
++ * @arg: The argument to the ioctl.
+  *
+  * This is not normally called as a function, but instead set in struct
+  * file_operations as
+diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
+index 5d826274570ca..c429c42a68679 100644
+--- a/fs/kernel_read_file.c
++++ b/fs/kernel_read_file.c
+@@ -8,16 +8,16 @@
+ /**
+  * kernel_read_file() - read file contents into a kernel buffer
+  *
+- * @file      file to read from
+- * @offset    where to start reading from (see below).
+- * @buf               pointer to a "void *" buffer for reading into (if
++ * @file:     file to read from
++ * @offset:   where to start reading from (see below).
++ * @buf:      pointer to a "void *" buffer for reading into (if
+  *            *@buf is NULL, a buffer will be allocated, and
+  *            @buf_size will be ignored)
+- * @buf_size  size of buf, if already allocated. If @buf not
++ * @buf_size: size of buf, if already allocated. If @buf not
+  *            allocated, this is the largest size to allocate.
+- * @file_size if non-NULL, the full size of @file will be
++ * @file_size:        if non-NULL, the full size of @file will be
+  *            written here.
+- * @id                the kernel_read_file_id identifying the type of
++ * @id:               the kernel_read_file_id identifying the type of
+  *            file contents being read (for LSMs to examine)
+  *
+  * @offset must be 0 unless both @buf and @file_size are non-NULL
+diff --git a/fs/namei.c b/fs/namei.c
+index 2bae29ea52ffa..567ee547492bc 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd)
+ 
+ /**
+  * path_connected - Verify that a dentry is below mnt.mnt_root
++ * @mnt: The mountpoint to check.
++ * @dentry: The dentry to check.
+  *
+  * Rename can sometimes move a file or directory outside of a bind
+  * mount, path_connected allows those cases to be detected.
+@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls);
+ /**
+  * may_follow_link - Check symlink following for unsafe situations
+  * @nd: nameidata pathwalk data
++ * @inode: Used for idmapping.
+  *
+  * In the case of the sysctl_protected_symlinks sysctl being enabled,
+  * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
+diff --git a/fs/open.c b/fs/open.c
+index e6ead0f199649..7c9647a8f219d 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
+  * backing_file_open - open a backing file for kernel internal use
+  * @path:     path of the file to open
+  * @flags:    open flags
+- * @path:     path of the backing file
++ * @real_path:        path of the backing file
+  * @cred:     credentials for open
+  *
+  * Open a backing file for a stackable filesystem (e.g., overlayfs).
+@@ -1546,7 +1546,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
+ }
+ 
+ /**
+- * close_range() - Close all file descriptors in a given range.
++ * sys_close_range() - Close all file descriptors in a given range.
+  *
+  * @fd:     starting file descriptor to close
+  * @max_fd: last file descriptor to close
+-- 
+2.40.1
+
diff --git a/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch b/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch

new file mode 100644 (file)

index 0000000..f03e86b
--- /dev/null
+++ b/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch
@@ -0,0 +1,80 @@
+From 2173505925ac9b7d66ba7765aca6bbdc9722bf92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Oct 2023 16:08:28 +0300
+Subject: ovl: fix regression in parsing of mount options with escaped comma
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit c34706acf40b43dd31f67c92c5a95d39666a1eb3 ]
+
+Ever since commit 91c77947133f ("ovl: allow filenames with comma"), the
+following example was legit overlayfs mount options:
+
+  mount -t overlay overlay -o 'lowerdir=/tmp/a\,b/lower' /mnt
+
+The conversion to new mount api moved to using the common helper
+generic_parse_monolithic() and discarded the specialized ovl_next_opt()
+option separator.
+
+Bring back ovl_next_opt() and use vfs_parse_monolithic_sep() to fix the
+regression.
+
+Reported-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
+Closes: https://lore.kernel.org/r/8da307fb-9318-cf78-8a27-ba5c5a0aef6d@alum.mit.edu/
+Fixes: 1784fbc2ed9c ("ovl: port to new mount api")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/params.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
+index c69d97aef2cf9..c0f70af422d6c 100644
+--- a/fs/overlayfs/params.c
++++ b/fs/overlayfs/params.c
+@@ -120,6 +120,34 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
+       {}
+ };
+ 
++static char *ovl_next_opt(char **s)
++{
++      char *sbegin = *s;
++      char *p;
++
++      if (sbegin == NULL)
++              return NULL;
++
++      for (p = sbegin; *p; p++) {
++              if (*p == '\\') {
++                      p++;
++                      if (!*p)
++                              break;
++              } else if (*p == ',') {
++                      *p = '\0';
++                      *s = p + 1;
++                      return sbegin;
++              }
++      }
++      *s = NULL;
++      return sbegin;
++}
++
++static int ovl_parse_monolithic(struct fs_context *fc, void *data)
++{
++      return vfs_parse_monolithic_sep(fc, data, ovl_next_opt);
++}
++
+ static ssize_t ovl_parse_param_split_lowerdirs(char *str)
+ {
+       ssize_t nr_layers = 1, nr_colons = 0;
+@@ -596,6 +624,7 @@ static int ovl_reconfigure(struct fs_context *fc)
+ }
+ 
+ static const struct fs_context_operations ovl_context_ops = {
++      .parse_monolithic = ovl_parse_monolithic,
+       .parse_param = ovl_parse_param,
+       .get_tree    = ovl_get_tree,
+       .reconfigure = ovl_reconfigure,
+-- 
+2.40.1
+
diff --git a/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch b/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch

new file mode 100644 (file)

index 0000000..217cedb
--- /dev/null
+++ b/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch
@@ -0,0 +1,144 @@
+From 816477eab8a0efd3989b6e6de7119180ef42dcdc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Oct 2023 17:07:03 +0300
+Subject: ovl: fix regression in showing lowerdir mount option
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 32db510708507f6133f496ff385cbd841d8f9098 ]
+
+Before commit b36a5780cb44 ("ovl: modify layer parameter parsing"),
+spaces and commas in lowerdir mount option value used to be escaped using
+seq_show_option().
+
+In current upstream, when lowerdir value has a space, it is not escaped
+in /proc/mounts, e.g.:
+
+  none /mnt overlay rw,relatime,lowerdir=l l,upperdir=u,workdir=w 0 0
+
+which results in broken output of the mount utility:
+
+  none on /mnt type overlay (rw,relatime,lowerdir=l)
+
+Store the original lowerdir mount options before unescaping and show
+them using the same escaping used for seq_show_option() in addition to
+escaping the colon separator character.
+
+Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/overlayfs.rst | 12 ++++++++
+ fs/overlayfs/params.c                   | 38 +++++++++++++++----------
+ 2 files changed, 35 insertions(+), 15 deletions(-)
+
+diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
+index eb7d2c88ddece..8e1b27288afd4 100644
+--- a/Documentation/filesystems/overlayfs.rst
++++ b/Documentation/filesystems/overlayfs.rst
+@@ -339,6 +339,18 @@ The specified lower directories will be stacked beginning from the
+ rightmost one and going left.  In the above example lower1 will be the
+ top, lower2 the middle and lower3 the bottom layer.
+ 
++Note: directory names containing colons can be provided as lower layer by
++escaping the colons with a single backslash.  For example:
++
++  mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged
++
++Since kernel version v6.5, directory names containing colons can also
++be provided as lower layer using the fsconfig syscall from new mount api:
++
++  fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0);
++
++In the latter case, colons in lower layer directory names will be escaped
++as an octal characters (\072) when displayed in /proc/self/mountinfo.
+ 
+ Metadata only copy up
+ ---------------------
+diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
+index e6edad7542e88..644badb13fe01 100644
+--- a/fs/overlayfs/params.c
++++ b/fs/overlayfs/params.c
+@@ -155,7 +155,8 @@ static ssize_t ovl_parse_param_split_lowerdirs(char *str)
+ 
+       for (s = d = str;; s++, d++) {
+               if (*s == '\\') {
+-                      s++;
++                      /* keep esc chars in split lowerdir */
++                      *d++ = *s++;
+               } else if (*s == ':') {
+                       bool next_colon = (*(s + 1) == ':');
+ 
+@@ -230,7 +231,7 @@ static void ovl_unescape(char *s)
+       }
+ }
+ 
+-static int ovl_mount_dir(const char *name, struct path *path)
++static int ovl_mount_dir(const char *name, struct path *path, bool upper)
+ {
+       int err = -ENOMEM;
+       char *tmp = kstrdup(name, GFP_KERNEL);
+@@ -239,7 +240,7 @@ static int ovl_mount_dir(const char *name, struct path *path)
+               ovl_unescape(tmp);
+               err = ovl_mount_dir_noesc(tmp, path);
+ 
+-              if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
++              if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) {
+                       pr_err("filesystem on '%s' not supported as upperdir\n",
+                              tmp);
+                       path_put_init(path);
+@@ -260,7 +261,7 @@ static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
+       struct path path;
+       char *dup;
+ 
+-      err = ovl_mount_dir(name, &path);
++      err = ovl_mount_dir(name, &path, true);
+       if (err)
+               return err;
+ 
+@@ -417,7 +418,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
+               l = &ctx->lower[nr];
+               memset(l, 0, sizeof(*l));
+ 
+-              err = ovl_mount_dir_noesc(dup_iter, &l->path);
++              err = ovl_mount_dir(dup_iter, &l->path, false);
+               if (err)
+                       goto out_put;
+ 
+@@ -858,16 +859,23 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+       struct super_block *sb = dentry->d_sb;
+       struct ovl_fs *ofs = sb->s_fs_info;
+       size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
+-      char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower];
+-
+-      /* lowerdirs[] starts from offset 1 */
+-      seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]);
+-      /* dump regular lower layers */
+-      for (nr = 2; nr < nr_merged_lower; nr++)
+-              seq_printf(m, ":%s", ofs->config.lowerdirs[nr]);
+-      /* dump data lower layers */
+-      for (nr = 0; nr < ofs->numdatalayer; nr++)
+-              seq_printf(m, "::%s", lowerdatadirs[nr]);
++
++      /*
++       * lowerdirs[] starts from offset 1, then
++       * >= 0 regular lower layers prefixed with : and
++       * >= 0 data-only lower layers prefixed with ::
++       *
++       * we need to escase comma and space like seq_show_option() does and
++       * we also need to escape the colon separator from lowerdir paths.
++       */
++      seq_puts(m, ",lowerdir=");
++      for (nr = 1; nr < ofs->numlayer; nr++) {
++              if (nr > 1)
++                      seq_putc(m, ':');
++              if (nr >= nr_merged_lower)
++                      seq_putc(m, ':');
++              seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\");
++      }
+       if (ofs->config.upperdir) {
+               seq_show_option(m, "upperdir", ofs->config.upperdir);
+               seq_show_option(m, "workdir", ofs->config.workdir);
+-- 
+2.40.1
+
diff --git a/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch b/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch

new file mode 100644 (file)

index 0000000..f1b9ba6
--- /dev/null
+++ b/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch
@@ -0,0 +1,164 @@
+From 664e6fe0160c646d8380c4020a1951fcc2365eac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Oct 2023 14:21:49 +0300
+Subject: ovl: make use of ->layers safe in rcu pathwalk
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit a535116d80339dbfe50b9b81b2f808c69eefbbc3 ]
+
+ovl_permission() accesses ->layers[...].mnt; we can't have ->layers
+freed without an RCU delay on fs shutdown.
+
+Fortunately, kern_unmount_array() that is used to drop those mounts
+does include an RCU delay, so freeing is delayed; unfortunately, the
+array passed to kern_unmount_array() is formed by mangling ->layers
+contents and that happens without any delays.
+
+The ->layers[...].name string entries are used to store the strings to
+display in "lowerdir=..." by ovl_show_options().  Those entries are not
+accessed in RCU walk.
+
+Move the name strings into a separate array ofs->config.lowerdirs and
+reuse the ofs->config.lowerdirs array as the temporary mount array to
+pass to kern_unmount_array().
+
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Link: https://lore.kernel.org/r/20231002023711.GP3389589@ZenIV/
+Acked-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Stable-dep-of: 32db51070850 ("ovl: fix regression in showing lowerdir mount option")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/ovl_entry.h | 10 +---------
+ fs/overlayfs/params.c    | 17 +++++++++--------
+ fs/overlayfs/super.c     | 18 +++++++++++-------
+ 3 files changed, 21 insertions(+), 24 deletions(-)
+
+diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
+index 306e1ecdc96d3..2b703521871ea 100644
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -8,6 +8,7 @@
+ struct ovl_config {
+       char *upperdir;
+       char *workdir;
++      char **lowerdirs;
+       bool default_permissions;
+       int redirect_mode;
+       bool index;
+@@ -38,17 +39,8 @@ struct ovl_layer {
+       int idx;
+       /* One fsid per unique underlying sb (upper fsid == 0) */
+       int fsid;
+-      char *name;
+ };
+ 
+-/*
+- * ovl_free_fs() relies on @mnt being the first member when unmounting
+- * the private mounts created for each layer. Let's check both the
+- * offset and type.
+- */
+-static_assert(offsetof(struct ovl_layer, mnt) == 0);
+-static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *));
+-
+ struct ovl_path {
+       const struct ovl_layer *layer;
+       struct dentry *dentry;
+diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
+index c0f70af422d6c..e6edad7542e88 100644
+--- a/fs/overlayfs/params.c
++++ b/fs/overlayfs/params.c
+@@ -695,12 +695,12 @@ void ovl_free_fs(struct ovl_fs *ofs)
+       if (ofs->upperdir_locked)
+               ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+ 
+-      /* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
+-      mounts = (struct vfsmount **) ofs->layers;
++      /* Reuse ofs->config.lowerdirs as a vfsmount array before freeing it */
++      mounts = (struct vfsmount **) ofs->config.lowerdirs;
+       for (i = 0; i < ofs->numlayer; i++) {
+               iput(ofs->layers[i].trap);
++              kfree(ofs->config.lowerdirs[i]);
+               mounts[i] = ofs->layers[i].mnt;
+-              kfree(ofs->layers[i].name);
+       }
+       kern_unmount_array(mounts, ofs->numlayer);
+       kfree(ofs->layers);
+@@ -708,6 +708,7 @@ void ovl_free_fs(struct ovl_fs *ofs)
+               free_anon_bdev(ofs->fs[i].pseudo_dev);
+       kfree(ofs->fs);
+ 
++      kfree(ofs->config.lowerdirs);
+       kfree(ofs->config.upperdir);
+       kfree(ofs->config.workdir);
+       if (ofs->creator_cred)
+@@ -857,16 +858,16 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+       struct super_block *sb = dentry->d_sb;
+       struct ovl_fs *ofs = sb->s_fs_info;
+       size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
+-      const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower];
++      char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower];
+ 
+-      /* ofs->layers[0] is the upper layer */
+-      seq_printf(m, ",lowerdir=%s", ofs->layers[1].name);
++      /* lowerdirs[] starts from offset 1 */
++      seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]);
+       /* dump regular lower layers */
+       for (nr = 2; nr < nr_merged_lower; nr++)
+-              seq_printf(m, ":%s", ofs->layers[nr].name);
++              seq_printf(m, ":%s", ofs->config.lowerdirs[nr]);
+       /* dump data lower layers */
+       for (nr = 0; nr < ofs->numdatalayer; nr++)
+-              seq_printf(m, "::%s", data_layers[nr].name);
++              seq_printf(m, "::%s", lowerdatadirs[nr]);
+       if (ofs->config.upperdir) {
+               seq_show_option(m, "upperdir", ofs->config.upperdir);
+               seq_show_option(m, "workdir", ofs->config.workdir);
+diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
+index 1090c68e5b051..80a70eaa30d90 100644
+--- a/fs/overlayfs/super.c
++++ b/fs/overlayfs/super.c
+@@ -565,11 +565,6 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
+       upper_layer->idx = 0;
+       upper_layer->fsid = 0;
+ 
+-      err = -ENOMEM;
+-      upper_layer->name = kstrdup(ofs->config.upperdir, GFP_KERNEL);
+-      if (!upper_layer->name)
+-              goto out;
+-
+       /*
+        * Inherit SB_NOSEC flag from upperdir.
+        *
+@@ -1113,7 +1108,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
+               layers[ofs->numlayer].idx = ofs->numlayer;
+               layers[ofs->numlayer].fsid = fsid;
+               layers[ofs->numlayer].fs = &ofs->fs[fsid];
+-              layers[ofs->numlayer].name = l->name;
++              /* Store for printing lowerdir=... in ovl_show_options() */
++              ofs->config.lowerdirs[ofs->numlayer] = l->name;
+               l->name = NULL;
+               ofs->numlayer++;
+               ofs->fs[fsid].is_lower = true;
+@@ -1358,8 +1354,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
+       if (!layers)
+               goto out_err;
+ 
++      ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL);
++      if (!ofs->config.lowerdirs) {
++              kfree(layers);
++              goto out_err;
++      }
+       ofs->layers = layers;
+-      /* Layer 0 is reserved for upper even if there's no upper */
++      /*
++       * Layer 0 is reserved for upper even if there's no upper.
++       * For consistency, config.lowerdirs[0] is NULL.
++       */
+       ofs->numlayer = 1;
+ 
+       sb->s_stack_depth = 0;
+-- 
+2.40.1
+
diff --git a/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch b/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch

new file mode 100644 (file)

index 0000000..cbfe771
--- /dev/null
+++ b/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch
@@ -0,0 +1,52 @@
+From c7f9a9e8a17b585d270d498eb8f97c56c6ef9e02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Sep 2023 20:31:16 +0200
+Subject: powerpc/64e: Fix wrong test in __ptep_test_and_clear_young()
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 5ea0bbaa32e8f54e9a57cfee4a3b8769b80be0d2 ]
+
+Commit 45201c879469 ("powerpc/nohash: Remove hash related code from
+nohash headers.") replaced:
+
+  if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+       return 0;
+
+By:
+
+  if (pte_young(*ptep))
+       return 0;
+
+But it should be:
+
+  if (!pte_young(*ptep))
+       return 0;
+
+Fix it.
+
+Fixes: 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/8bb7f06494e21adada724ede47a4c3d97e879d40.1695659959.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
+index 287e25864ffae..072048e723c9b 100644
+--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
+@@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ {
+       unsigned long old;
+ 
+-      if (pte_young(*ptep))
++      if (!pte_young(*ptep))
+               return 0;
+       old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+       return (old & _PAGE_ACCESSED) != 0;
+-- 
+2.40.1
+
diff --git a/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch b/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch

new file mode 100644 (file)

index 0000000..3ac43cd
--- /dev/null
+++ b/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch
@@ -0,0 +1,62 @@
+From 27ca6c2812d62d46ae1c8070d5d91e2f81ab7b09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Sep 2023 20:31:15 +0200
+Subject: powerpc/8xx: Fix pte_access_permitted() for PAGE_NONE
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 5d9cea8a552ee122e21fbd5a3c5d4eb85f648e06 ]
+
+On 8xx, PAGE_NONE is handled by setting _PAGE_NA instead of clearing
+_PAGE_USER.
+
+But then pte_user() returns 1 also for PAGE_NONE.
+
+As _PAGE_NA prevent reads, add a specific version of pte_read()
+that returns 0 when _PAGE_NA is set instead of always returning 1.
+
+Fixes: 351750331fc1 ("powerpc/mm: Introduce _PAGE_NA")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/57bcfbe578e43123f9ed73e040229b80f1ad56ec.1695659959.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/nohash/32/pte-8xx.h | 7 +++++++
+ arch/powerpc/include/asm/nohash/pgtable.h    | 2 ++
+ 2 files changed, 9 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+index 1a89ebdc3acc9..0238e6bd0d6c1 100644
+--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
++++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+@@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte)
+ 
+ #define pte_wrprotect pte_wrprotect
+ 
++static inline int pte_read(pte_t pte)
++{
++      return (pte_val(pte) & _PAGE_RO) != _PAGE_NA;
++}
++
++#define pte_read pte_read
++
+ static inline int pte_write(pte_t pte)
+ {
+       return !(pte_val(pte) & _PAGE_RO);
+diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
+index a6caaaab6f922..3af11981fcd55 100644
+--- a/arch/powerpc/include/asm/nohash/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/pgtable.h
+@@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte)
+       return pte_val(pte) & _PAGE_RW;
+ }
+ #endif
++#ifndef pte_read
+ static inline int pte_read(pte_t pte)         { return 1; }
++#endif
+ static inline int pte_dirty(pte_t pte)                { return pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_special(pte_t pte)      { return pte_val(pte) & _PAGE_SPECIAL; }
+ static inline int pte_none(pte_t pte)         { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+-- 
+2.40.1
+
diff --git a/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch

new file mode 100644 (file)

index 0000000..b7f547b
--- /dev/null
+++ b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch
@@ -0,0 +1,190 @@
+From d187b9c8315c6221a4224c99f361c512d97c6114 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 Oct 2023 12:02:02 -0700
+Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 ]
+
+This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the
+two subsequent fixes to it:
+
+  3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs")
+  b1472a60a584 ("x86/smp: Don't send INIT to boot CPU")
+
+because it seems to result in hung machines at shutdown.  Particularly
+some Dell machines, but Thomas says
+
+ "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs -
+  at least that's what I could figure out from the various bug reports.
+
+  I don't know which CPUs the DELL machines have, so I can't say it's a
+  pattern.
+
+  I agree with the revert for now"
+
+Ashok Raj chimes in:
+
+ "There was a report (probably this same one), and it turns out it was a
+  bug in the BIOS SMI handler.
+
+  The client BIOS's were waiting for the lowest APICID to be the SMI
+  rendevous master. If this is MeteorLake, the BSP wasn't the one with
+  the lowest APIC and it triped here.
+
+  The BIOS change is also being pushed to others for assimilation :)
+
+  Server BIOS's had this correctly for a while now"
+
+and it does look likely to be some bad interaction between SMI and the
+non-BSP cores having put into INIT (and thus unresponsive until reset).
+
+Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429
+Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/
+Link: https://forum.artixlinux.org/index.php/topic,5997.0.html
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/smp.h |  1 -
+ arch/x86/kernel/smp.c      | 39 +++++++-------------------------------
+ arch/x86/kernel/smpboot.c  | 27 --------------------------
+ 3 files changed, 7 insertions(+), 60 deletions(-)
+
+diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
+index c1bcb1449ee89..7bbeac8bd49ea 100644
+--- a/arch/x86/include/asm/smp.h
++++ b/arch/x86/include/asm/smp.h
+@@ -134,7 +134,6 @@ void native_send_call_func_ipi(const struct cpumask *mask);
+ void native_send_call_func_single_ipi(int cpu);
+ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+ 
+-bool smp_park_other_cpus_in_init(void);
+ void smp_store_cpu_info(int id);
+ 
+ asmlinkage __visible void smp_reboot_interrupt(void);
+diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
+index 7eb18ca7bd45b..cc8ef9bfcb52f 100644
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
+ }
+ 
+ /*
+- * Disable virtualization, APIC etc. and park the CPU in a HLT loop
++ * this function calls the 'stop' function on all other CPUs in the system.
+  */
+ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
+ {
+@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait)
+        * 2) Wait for all other CPUs to report that they reached the
+        *    HLT loop in stop_this_cpu()
+        *
+-       * 3) If the system uses INIT/STARTUP for CPU bringup, then
+-       *    send all present CPUs an INIT vector, which brings them
+-       *    completely out of the way.
++       * 3) If #2 timed out send an NMI to the CPUs which did not
++       *    yet report
+        *
+-       * 4) If #3 is not possible and #2 timed out send an NMI to the
+-       *    CPUs which did not yet report
+-       *
+-       * 5) Wait for all other CPUs to report that they reached the
++       * 4) Wait for all other CPUs to report that they reached the
+        *    HLT loop in stop_this_cpu()
+        *
+-       * #4 can obviously race against a CPU reaching the HLT loop late.
++       * #3 can obviously race against a CPU reaching the HLT loop late.
+        * That CPU will have reported already and the "have all CPUs
+        * reached HLT" condition will be true despite the fact that the
+        * other CPU is still handling the NMI. Again, there is no
+@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait)
+               /*
+                * Don't wait longer than a second for IPI completion. The
+                * wait request is not checked here because that would
+-               * prevent an NMI/INIT shutdown in case that not all
++               * prevent an NMI shutdown attempt in case that not all
+                * CPUs reach shutdown state.
+                */
+               timeout = USEC_PER_SEC;
+@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait)
+                       udelay(1);
+       }
+ 
+-      /*
+-       * Park all other CPUs in INIT including "offline" CPUs, if
+-       * possible. That's a safe place where they can't resume execution
+-       * of HLT and then execute the HLT loop from overwritten text or
+-       * page tables.
+-       *
+-       * The only downside is a broadcast MCE, but up to the point where
+-       * the kexec() kernel brought all APs online again an MCE will just
+-       * make HLT resume and handle the MCE. The machine crashes and burns
+-       * due to overwritten text, page tables and data. So there is a
+-       * choice between fire and frying pan. The result is pretty much
+-       * the same. Chose frying pan until x86 provides a sane mechanism
+-       * to park a CPU.
+-       */
+-      if (smp_park_other_cpus_in_init())
+-              goto done;
+-
+-      /*
+-       * If park with INIT was not possible and the REBOOT_VECTOR didn't
+-       * take all secondary CPUs offline, try with the NMI.
+-       */
++      /* if the REBOOT_VECTOR didn't work, try with the NMI */
+       if (!cpumask_empty(&cpus_stop_mask)) {
+               /*
+                * If NMI IPI is enabled, try to register the stop handler
+@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait)
+                       udelay(1);
+       }
+ 
+-done:
+       local_irq_save(flags);
+       disable_local_APIC();
+       mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index d3179aae1384f..4c502de98746c 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -1346,33 +1346,6 @@ void arch_thaw_secondary_cpus_end(void)
+       cache_aps_init();
+ }
+ 
+-bool smp_park_other_cpus_in_init(void)
+-{
+-      unsigned int cpu, this_cpu = smp_processor_id();
+-      unsigned int apicid;
+-
+-      if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
+-              return false;
+-
+-      /*
+-       * If this is a crash stop which does not execute on the boot CPU,
+-       * then this cannot use the INIT mechanism because INIT to the boot
+-       * CPU will reset the machine.
+-       */
+-      if (this_cpu)
+-              return false;
+-
+-      for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
+-              if (cpu == this_cpu)
+-                      continue;
+-              apicid = apic->cpu_present_to_apicid(cpu);
+-              if (apicid == BAD_APICID)
+-                      continue;
+-              send_init_sequence(apicid);
+-      }
+-      return true;
+-}
+-
+ /*
+  * Early setup to make printk work.
+  */
+-- 
+2.40.1
+
diff --git a/queue-6.5/series b/queue-6.5/series

index 6cc0771e37eedabe8c49d09cc1bd87a869836632..4cfbdf0f530714f35eb9544510561e297b6728e0 100644 (file)
--- a/queue-6.5/series
+++ b/queue-6.5/series
@@ -178,3 +178,14 @@ usb-gadget-ncm-handle-decoding-of-multiple-ntb-s-in-unwrap-call.patch
  usb-cdnsp-fixes-issue-with-dequeuing-not-queued-requests.patch
  usb-typec-qcom-update-the-logic-of-regulator-enable-and-disable.patch
  usb-misc-onboard_hub-add-support-for-microchip-usb2412-usb-2.0-hub.patch
+dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch
+dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch
+powerpc-8xx-fix-pte_access_permitted-for-page_none.patch
+powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch
+fs-fix-kernel-doc-warnings.patch
+fs-factor-out-vfs_parse_monolithic_sep-helper.patch
+ovl-fix-regression-in-parsing-of-mount-options-with-.patch
+ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch
+ovl-fix-regression-in-showing-lowerdir-mount-option.patch
+x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch
+revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch
diff --git a/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch b/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch

new file mode 100644 (file)

index 0000000..0e76a03
--- /dev/null
+++ b/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch
@@ -0,0 +1,50 @@
+From 3bfa185fb41b94708364663e230ae722dfff266c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 18:05:33 +0000
+Subject: x86/smpboot: Change smp_store_boot_cpu_info() to static
+
+From: Sohil Mehta <sohil.mehta@intel.com>
+
+[ Upstream commit d7114f83ee051dfeac82546d7ba03d74f8b92af3 ]
+
+The function is only used locally. Convert it to a static one.
+
+Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20230727180533.3119660-4-sohil.mehta@intel.com
+Stable-dep-of: fbe1bf1e5ff1 ("Revert "x86/smp: Put CPUs into INIT on shutdown if possible"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/smp.h | 2 --
+ arch/x86/kernel/smpboot.c  | 2 +-
+ 2 files changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
+index 600cf25dbfc64..c1bcb1449ee89 100644
+--- a/arch/x86/include/asm/smp.h
++++ b/arch/x86/include/asm/smp.h
+@@ -135,8 +135,6 @@ void native_send_call_func_single_ipi(int cpu);
+ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+ 
+ bool smp_park_other_cpus_in_init(void);
+-
+-void smp_store_boot_cpu_info(void);
+ void smp_store_cpu_info(int id);
+ 
+ asmlinkage __visible void smp_reboot_interrupt(void);
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 747b83a373a2d..d3179aae1384f 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -422,7 +422,7 @@ int topology_update_die_map(unsigned int die, unsigned int cpu)
+       return 0;
+ }
+ 
+-void __init smp_store_boot_cpu_info(void)
++static void __init smp_store_boot_cpu_info(void)
+ {
+       int id = 0; /* CPU 0 */
+       struct cpuinfo_x86 *c = &cpu_data(id);
+-- 
+2.40.1
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 16 Oct 2023 02:50:04 +0000 (22:50 -0400)
queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/fs-fix-kernel-doc-warnings.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch	[new file with mode: 0644]	patch \| blob
queue-6.5/series		patch \| blob \| blame \| history
queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch	[new file with mode: 0644]	patch \| blob