From beab3bf3cdebbe4c80f3442d790d537ecd0f1798 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 15 Oct 2023 22:50:04 -0400 Subject: [PATCH] Fixes for 6.5 Signed-off-by: Sasha Levin --- ...se-spin_lock_irqsave-before-wait_eve.patch | 92 ++++++++ ...ek-fix-deadlock-caused-by-synchroniz.patch | 53 +++++ ...-out-vfs_parse_monolithic_sep-helper.patch | 103 +++++++++ queue-6.5/fs-fix-kernel-doc-warnings.patch | 198 ++++++++++++++++++ ...on-in-parsing-of-mount-options-with-.patch | 80 +++++++ ...ion-in-showing-lowerdir-mount-option.patch | 144 +++++++++++++ ...e-use-of-layers-safe-in-rcu-pathwalk.patch | 164 +++++++++++++++ ...wrong-test-in-__ptep_test_and_clear_.patch | 52 +++++ ...x-pte_access_permitted-for-page_none.patch | 62 ++++++ ...ut-cpus-into-init-on-shutdown-if-pos.patch | 190 +++++++++++++++++ queue-6.5/series | 11 + ...ge-smp_store_boot_cpu_info-to-static.patch | 50 +++++ 12 files changed, 1199 insertions(+) create mode 100644 queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch create mode 100644 queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch create mode 100644 queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch create mode 100644 queue-6.5/fs-fix-kernel-doc-warnings.patch create mode 100644 queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch create mode 100644 queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch create mode 100644 queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch create mode 100644 queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch create mode 100644 queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch create mode 100644 queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch create mode 100644 queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch diff --git a/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch b/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch new file mode 100644 index 00000000000..d4f6099a932 --- /dev/null +++ b/queue-6.5/dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch @@ -0,0 +1,92 @@ +From bb77c7aa35e7d1ac1e93cc3c5fd4432f65b3e4cc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Sep 2023 14:06:19 +0800 +Subject: dmaengine: idxd: use spin_lock_irqsave before wait_event_lock_irq + +From: Rex Zhang + +[ Upstream commit c0409dd3d151f661e7e57b901a81a02565df163c ] + +In idxd_cmd_exec(), wait_event_lock_irq() explicitly calls +spin_unlock_irq()/spin_lock_irq(). If the interrupt is on before entering +wait_event_lock_irq(), it will become off status after +wait_event_lock_irq() is called. Later, wait_for_completion() may go to +sleep but irq is disabled. The scenario is warned in might_sleep(). + +Fix it by using spin_lock_irqsave() instead of the primitive spin_lock() +to save the irq status before entering wait_event_lock_irq() and using +spin_unlock_irqrestore() instead of the primitive spin_unlock() to restore +the irq status before entering wait_for_completion(). + +Before the change: +idxd_cmd_exec() { +interrupt is on +spin_lock() // interrupt is on + wait_event_lock_irq() + spin_unlock_irq() // interrupt is enabled + ... + spin_lock_irq() // interrupt is disabled +spin_unlock() // interrupt is still disabled +wait_for_completion() // report "BUG: sleeping function + // called from invalid context... + // in_atomic() irqs_disabled()" +} + +After applying spin_lock_irqsave(): +idxd_cmd_exec() { +interrupt is on +spin_lock_irqsave() // save the on state + // interrupt is disabled + wait_event_lock_irq() + spin_unlock_irq() // interrupt is enabled + ... + spin_lock_irq() // interrupt is disabled +spin_unlock_irqrestore() // interrupt is restored to on +wait_for_completion() // No Call trace +} + +Fixes: f9f4082dbc56 ("dmaengine: idxd: remove interrupt disable for cmd_lock") +Signed-off-by: Rex Zhang +Signed-off-by: Lijun Pan +Reviewed-by: Dave Jiang +Reviewed-by: Fenghua Yu +Link: https://lore.kernel.org/r/20230916060619.3744220-1-rex.zhang@intel.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/idxd/device.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c +index 9a15f0d12c799..97b505f1115ab 100644 +--- a/drivers/dma/idxd/device.c ++++ b/drivers/dma/idxd/device.c +@@ -492,6 +492,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + union idxd_command_reg cmd; + DECLARE_COMPLETION_ONSTACK(done); + u32 stat; ++ unsigned long flags; + + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); +@@ -505,7 +506,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + cmd.operand = operand; + cmd.int_req = 1; + +- spin_lock(&idxd->cmd_lock); ++ spin_lock_irqsave(&idxd->cmd_lock, flags); + wait_event_lock_irq(idxd->cmd_waitq, + !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags), + idxd->cmd_lock); +@@ -522,7 +523,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, + * After command submitted, release lock and go to sleep until + * the command completes via interrupt. + */ +- spin_unlock(&idxd->cmd_lock); ++ spin_unlock_irqrestore(&idxd->cmd_lock, flags); + wait_for_completion(&done); + stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET); + spin_lock(&idxd->cmd_lock); +-- +2.40.1 + diff --git a/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch b/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch new file mode 100644 index 00000000000..e66fcf29497 --- /dev/null +++ b/queue-6.5/dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch @@ -0,0 +1,53 @@ +From 45b31de64194cf4a883bbb605cc9b2cdec33ebd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Aug 2023 11:25:11 +0800 +Subject: dmaengine: mediatek: Fix deadlock caused by synchronize_irq() + +From: Duoming Zhou + +[ Upstream commit 01f1ae2733e2bb4de92fefcea5fda847d92aede1 ] + +The synchronize_irq(c->irq) will not return until the IRQ handler +mtk_uart_apdma_irq_handler() is completed. If the synchronize_irq() +holds a spin_lock and waits the IRQ handler to complete, but the +IRQ handler also needs the same spin_lock. The deadlock will happen. +The process is shown below: + + cpu0 cpu1 +mtk_uart_apdma_device_pause() | mtk_uart_apdma_irq_handler() + spin_lock_irqsave() | + | spin_lock_irqsave() + //hold the lock to wait | + synchronize_irq() | + +This patch reorders the synchronize_irq(c->irq) outside the spin_lock +in order to mitigate the bug. + +Fixes: 9135408c3ace ("dmaengine: mediatek: Add MediaTek UART APDMA support") +Signed-off-by: Duoming Zhou +Reviewed-by: Eugen Hristev +Link: https://lore.kernel.org/r/20230806032511.45263-1-duoming@zju.edu.cn +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/mediatek/mtk-uart-apdma.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c +index a1517ef1f4a01..0acf6a92a4ad3 100644 +--- a/drivers/dma/mediatek/mtk-uart-apdma.c ++++ b/drivers/dma/mediatek/mtk-uart-apdma.c +@@ -451,9 +451,8 @@ static int mtk_uart_apdma_device_pause(struct dma_chan *chan) + mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); + mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); + +- synchronize_irq(c->irq); +- + spin_unlock_irqrestore(&c->vc.lock, flags); ++ synchronize_irq(c->irq); + + return 0; + } +-- +2.40.1 + diff --git a/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch b/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch new file mode 100644 index 00000000000..b8682ba666e --- /dev/null +++ b/queue-6.5/fs-factor-out-vfs_parse_monolithic_sep-helper.patch @@ -0,0 +1,103 @@ +From a7311f1016b7c251e3d01c338e2da0aa9a0fed85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Oct 2023 15:24:17 +0300 +Subject: fs: factor out vfs_parse_monolithic_sep() helper + +From: Amir Goldstein + +[ Upstream commit e001d1447cd4585d7f23a44ff668ba2bc624badb ] + +Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), +so filesystems could use it with a custom option separator callback. + +Acked-by: Christian Brauner +Signed-off-by: Amir Goldstein +Stable-dep-of: c34706acf40b ("ovl: fix regression in parsing of mount options with escaped comma") +Signed-off-by: Sasha Levin +--- + fs/fs_context.c | 34 +++++++++++++++++++++++++++++----- + include/linux/fs_context.h | 2 ++ + 2 files changed, 31 insertions(+), 5 deletions(-) + +diff --git a/fs/fs_context.c b/fs/fs_context.c +index a48a69caddce1..896e89acac5c2 100644 +--- a/fs/fs_context.c ++++ b/fs/fs_context.c +@@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, + EXPORT_SYMBOL(vfs_parse_fs_string); + + /** +- * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data ++ * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data + * @fc: The superblock configuration to fill in. + * @data: The data to parse ++ * @sep: callback for separating next option + * +- * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be +- * called from the ->monolithic_mount_data() fs_context operation. ++ * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom ++ * option separator callback. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +-int generic_parse_monolithic(struct fs_context *fc, void *data) ++int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, ++ char *(*sep)(char **)) + { + char *options = data, *key; + int ret = 0; +@@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) + if (ret) + return ret; + +- while ((key = strsep(&options, ",")) != NULL) { ++ while ((key = sep(&options)) != NULL) { + if (*key) { + size_t v_len = 0; + char *value = strchr(key, '='); +@@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) + + return ret; + } ++EXPORT_SYMBOL(vfs_parse_monolithic_sep); ++ ++static char *vfs_parse_comma_sep(char **s) ++{ ++ return strsep(s, ","); ++} ++ ++/** ++ * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data ++ * @fc: The superblock configuration to fill in. ++ * @data: The data to parse ++ * ++ * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be ++ * called from the ->monolithic_mount_data() fs_context operation. ++ * ++ * Returns 0 on success or the error returned by the ->parse_option() fs_context ++ * operation on failure. ++ */ ++int generic_parse_monolithic(struct fs_context *fc, void *data) ++{ ++ return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); ++} + EXPORT_SYMBOL(generic_parse_monolithic); + + /** +diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h +index ff6341e09925b..ae556dc8e18fe 100644 +--- a/include/linux/fs_context.h ++++ b/include/linux/fs_context.h +@@ -135,6 +135,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); + extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); + extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value, size_t v_size); ++int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, ++ char *(*sep)(char **)); + extern int generic_parse_monolithic(struct fs_context *fc, void *data); + extern int vfs_get_tree(struct fs_context *fc); + extern void put_fs_context(struct fs_context *fc); +-- +2.40.1 + diff --git a/queue-6.5/fs-fix-kernel-doc-warnings.patch b/queue-6.5/fs-fix-kernel-doc-warnings.patch new file mode 100644 index 00000000000..e8c10fdb28d --- /dev/null +++ b/queue-6.5/fs-fix-kernel-doc-warnings.patch @@ -0,0 +1,198 @@ +From 04c658698602b678d1548dd99a0164b84d06e48c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 21:08:24 +0100 +Subject: fs: Fix kernel-doc warnings + +From: Matthew Wilcox (Oracle) + +[ Upstream commit 35931eb3945b8d38c31f8e956aee3cf31c52121b ] + +These have a variety of causes and a corresponding variety of solutions. + +Signed-off-by: "Matthew Wilcox (Oracle)" +Message-Id: <20230818200824.2720007-1-willy@infradead.org> +Signed-off-by: Christian Brauner +Stable-dep-of: c34706acf40b ("ovl: fix regression in parsing of mount options with escaped comma") +Signed-off-by: Sasha Levin +--- + fs/file.c | 3 ++- + fs/fs_context.c | 12 +++++++++--- + fs/ioctl.c | 10 +++++++--- + fs/kernel_read_file.c | 12 ++++++------ + fs/namei.c | 3 +++ + fs/open.c | 4 ++-- + 6 files changed, 29 insertions(+), 15 deletions(-) + +diff --git a/fs/file.c b/fs/file.c +index 3fd003a8604f8..568a98178007c 100644 +--- a/fs/file.c ++++ b/fs/file.c +@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */ + + /** + * last_fd - return last valid index into fd table +- * @cur_fds: files struct ++ * @fdt: File descriptor table. + * + * Context: Either rcu read lock or files_lock must be held. + * +@@ -723,6 +723,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd, + * + * @fd: starting file descriptor to close + * @max_fd: last file descriptor to close ++ * @flags: CLOSE_RANGE flags. + * + * This closes a range of file descriptors. All file descriptors + * from @fd up to and including @max_fd are closed. +diff --git a/fs/fs_context.c b/fs/fs_context.c +index 375023e40161d..a48a69caddce1 100644 +--- a/fs/fs_context.c ++++ b/fs/fs_context.c +@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param); + + /** + * vfs_parse_fs_string - Convenience function to just parse a string. ++ * @fc: Filesystem context. ++ * @key: Parameter name. ++ * @value: Default value. ++ * @v_size: Maximum number of bytes in the value. + */ + int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value, size_t v_size) +@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string); + + /** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data +- * @ctx: The superblock configuration to fill in. ++ * @fc: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be +@@ -354,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc) + static void legacy_fs_context_free(struct fs_context *fc); + + /** +- * vfs_dup_fc_config: Duplicate a filesystem context. ++ * vfs_dup_fs_context - Duplicate a filesystem context. + * @src_fc: The context to copy. + */ + struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) +@@ -400,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context); + + /** + * logfc - Log a message to a filesystem context +- * @fc: The filesystem context to log to. ++ * @log: The filesystem context to log to, or NULL to use printk. ++ * @prefix: A string to prefix the output with, or NULL. ++ * @level: 'w' for a warning, 'e' for an error. Anything else is a notice. + * @fmt: The format of the buffer. + */ + void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...) +diff --git a/fs/ioctl.c b/fs/ioctl.c +index 5b2481cd47501..d413e0b8f6c29 100644 +--- a/fs/ioctl.c ++++ b/fs/ioctl.c +@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p) + * Returns 0 on success, -errno on error, 1 if this was the last + * extent that will fit in user array. + */ +-#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) +-#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) +-#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) + int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, + u64 phys, u64 len, u32 flags) + { +@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, + if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) + return 1; + ++#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) ++#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) ++#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) ++ + if (flags & SET_UNKNOWN_FLAGS) + flags |= FIEMAP_EXTENT_UNKNOWN; + if (flags & SET_NO_UNMOUNTED_IO_FLAGS) +@@ -877,6 +878,9 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) + #ifdef CONFIG_COMPAT + /** + * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation ++ * @file: The file to operate on. ++ * @cmd: The ioctl command number. ++ * @arg: The argument to the ioctl. + * + * This is not normally called as a function, but instead set in struct + * file_operations as +diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c +index 5d826274570ca..c429c42a68679 100644 +--- a/fs/kernel_read_file.c ++++ b/fs/kernel_read_file.c +@@ -8,16 +8,16 @@ + /** + * kernel_read_file() - read file contents into a kernel buffer + * +- * @file file to read from +- * @offset where to start reading from (see below). +- * @buf pointer to a "void *" buffer for reading into (if ++ * @file: file to read from ++ * @offset: where to start reading from (see below). ++ * @buf: pointer to a "void *" buffer for reading into (if + * *@buf is NULL, a buffer will be allocated, and + * @buf_size will be ignored) +- * @buf_size size of buf, if already allocated. If @buf not ++ * @buf_size: size of buf, if already allocated. If @buf not + * allocated, this is the largest size to allocate. +- * @file_size if non-NULL, the full size of @file will be ++ * @file_size: if non-NULL, the full size of @file will be + * written here. +- * @id the kernel_read_file_id identifying the type of ++ * @id: the kernel_read_file_id identifying the type of + * file contents being read (for LSMs to examine) + * + * @offset must be 0 unless both @buf and @file_size are non-NULL +diff --git a/fs/namei.c b/fs/namei.c +index 2bae29ea52ffa..567ee547492bc 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd) + + /** + * path_connected - Verify that a dentry is below mnt.mnt_root ++ * @mnt: The mountpoint to check. ++ * @dentry: The dentry to check. + * + * Rename can sometimes move a file or directory outside of a bind + * mount, path_connected allows those cases to be detected. +@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls); + /** + * may_follow_link - Check symlink following for unsafe situations + * @nd: nameidata pathwalk data ++ * @inode: Used for idmapping. + * + * In the case of the sysctl_protected_symlinks sysctl being enabled, + * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is +diff --git a/fs/open.c b/fs/open.c +index e6ead0f199649..7c9647a8f219d 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open); + * backing_file_open - open a backing file for kernel internal use + * @path: path of the file to open + * @flags: open flags +- * @path: path of the backing file ++ * @real_path: path of the backing file + * @cred: credentials for open + * + * Open a backing file for a stackable filesystem (e.g., overlayfs). +@@ -1546,7 +1546,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd) + } + + /** +- * close_range() - Close all file descriptors in a given range. ++ * sys_close_range() - Close all file descriptors in a given range. + * + * @fd: starting file descriptor to close + * @max_fd: last file descriptor to close +-- +2.40.1 + diff --git a/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch b/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch new file mode 100644 index 00000000000..f03e86b0692 --- /dev/null +++ b/queue-6.5/ovl-fix-regression-in-parsing-of-mount-options-with-.patch @@ -0,0 +1,80 @@ +From 2173505925ac9b7d66ba7765aca6bbdc9722bf92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Oct 2023 16:08:28 +0300 +Subject: ovl: fix regression in parsing of mount options with escaped comma + +From: Amir Goldstein + +[ Upstream commit c34706acf40b43dd31f67c92c5a95d39666a1eb3 ] + +Ever since commit 91c77947133f ("ovl: allow filenames with comma"), the +following example was legit overlayfs mount options: + + mount -t overlay overlay -o 'lowerdir=/tmp/a\,b/lower' /mnt + +The conversion to new mount api moved to using the common helper +generic_parse_monolithic() and discarded the specialized ovl_next_opt() +option separator. + +Bring back ovl_next_opt() and use vfs_parse_monolithic_sep() to fix the +regression. + +Reported-by: Ryan Hendrickson +Closes: https://lore.kernel.org/r/8da307fb-9318-cf78-8a27-ba5c5a0aef6d@alum.mit.edu/ +Fixes: 1784fbc2ed9c ("ovl: port to new mount api") +Signed-off-by: Amir Goldstein +Signed-off-by: Sasha Levin +--- + fs/overlayfs/params.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c +index c69d97aef2cf9..c0f70af422d6c 100644 +--- a/fs/overlayfs/params.c ++++ b/fs/overlayfs/params.c +@@ -120,6 +120,34 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { + {} + }; + ++static char *ovl_next_opt(char **s) ++{ ++ char *sbegin = *s; ++ char *p; ++ ++ if (sbegin == NULL) ++ return NULL; ++ ++ for (p = sbegin; *p; p++) { ++ if (*p == '\\') { ++ p++; ++ if (!*p) ++ break; ++ } else if (*p == ',') { ++ *p = '\0'; ++ *s = p + 1; ++ return sbegin; ++ } ++ } ++ *s = NULL; ++ return sbegin; ++} ++ ++static int ovl_parse_monolithic(struct fs_context *fc, void *data) ++{ ++ return vfs_parse_monolithic_sep(fc, data, ovl_next_opt); ++} ++ + static ssize_t ovl_parse_param_split_lowerdirs(char *str) + { + ssize_t nr_layers = 1, nr_colons = 0; +@@ -596,6 +624,7 @@ static int ovl_reconfigure(struct fs_context *fc) + } + + static const struct fs_context_operations ovl_context_ops = { ++ .parse_monolithic = ovl_parse_monolithic, + .parse_param = ovl_parse_param, + .get_tree = ovl_get_tree, + .reconfigure = ovl_reconfigure, +-- +2.40.1 + diff --git a/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch b/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch new file mode 100644 index 00000000000..217cedb1cf7 --- /dev/null +++ b/queue-6.5/ovl-fix-regression-in-showing-lowerdir-mount-option.patch @@ -0,0 +1,144 @@ +From 816477eab8a0efd3989b6e6de7119180ef42dcdc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Oct 2023 17:07:03 +0300 +Subject: ovl: fix regression in showing lowerdir mount option + +From: Amir Goldstein + +[ Upstream commit 32db510708507f6133f496ff385cbd841d8f9098 ] + +Before commit b36a5780cb44 ("ovl: modify layer parameter parsing"), +spaces and commas in lowerdir mount option value used to be escaped using +seq_show_option(). + +In current upstream, when lowerdir value has a space, it is not escaped +in /proc/mounts, e.g.: + + none /mnt overlay rw,relatime,lowerdir=l l,upperdir=u,workdir=w 0 0 + +which results in broken output of the mount utility: + + none on /mnt type overlay (rw,relatime,lowerdir=l) + +Store the original lowerdir mount options before unescaping and show +them using the same escaping used for seq_show_option() in addition to +escaping the colon separator character. + +Fixes: b36a5780cb44 ("ovl: modify layer parameter parsing") +Signed-off-by: Amir Goldstein +Signed-off-by: Sasha Levin +--- + Documentation/filesystems/overlayfs.rst | 12 ++++++++ + fs/overlayfs/params.c | 38 +++++++++++++++---------- + 2 files changed, 35 insertions(+), 15 deletions(-) + +diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst +index eb7d2c88ddece..8e1b27288afd4 100644 +--- a/Documentation/filesystems/overlayfs.rst ++++ b/Documentation/filesystems/overlayfs.rst +@@ -339,6 +339,18 @@ The specified lower directories will be stacked beginning from the + rightmost one and going left. In the above example lower1 will be the + top, lower2 the middle and lower3 the bottom layer. + ++Note: directory names containing colons can be provided as lower layer by ++escaping the colons with a single backslash. For example: ++ ++ mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged ++ ++Since kernel version v6.5, directory names containing colons can also ++be provided as lower layer using the fsconfig syscall from new mount api: ++ ++ fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); ++ ++In the latter case, colons in lower layer directory names will be escaped ++as an octal characters (\072) when displayed in /proc/self/mountinfo. + + Metadata only copy up + --------------------- +diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c +index e6edad7542e88..644badb13fe01 100644 +--- a/fs/overlayfs/params.c ++++ b/fs/overlayfs/params.c +@@ -155,7 +155,8 @@ static ssize_t ovl_parse_param_split_lowerdirs(char *str) + + for (s = d = str;; s++, d++) { + if (*s == '\\') { +- s++; ++ /* keep esc chars in split lowerdir */ ++ *d++ = *s++; + } else if (*s == ':') { + bool next_colon = (*(s + 1) == ':'); + +@@ -230,7 +231,7 @@ static void ovl_unescape(char *s) + } + } + +-static int ovl_mount_dir(const char *name, struct path *path) ++static int ovl_mount_dir(const char *name, struct path *path, bool upper) + { + int err = -ENOMEM; + char *tmp = kstrdup(name, GFP_KERNEL); +@@ -239,7 +240,7 @@ static int ovl_mount_dir(const char *name, struct path *path) + ovl_unescape(tmp); + err = ovl_mount_dir_noesc(tmp, path); + +- if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { ++ if (!err && upper && path->dentry->d_flags & DCACHE_OP_REAL) { + pr_err("filesystem on '%s' not supported as upperdir\n", + tmp); + path_put_init(path); +@@ -260,7 +261,7 @@ static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc, + struct path path; + char *dup; + +- err = ovl_mount_dir(name, &path); ++ err = ovl_mount_dir(name, &path, true); + if (err) + return err; + +@@ -417,7 +418,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) + l = &ctx->lower[nr]; + memset(l, 0, sizeof(*l)); + +- err = ovl_mount_dir_noesc(dup_iter, &l->path); ++ err = ovl_mount_dir(dup_iter, &l->path, false); + if (err) + goto out_put; + +@@ -858,16 +859,23 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) + struct super_block *sb = dentry->d_sb; + struct ovl_fs *ofs = sb->s_fs_info; + size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; +- char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower]; +- +- /* lowerdirs[] starts from offset 1 */ +- seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]); +- /* dump regular lower layers */ +- for (nr = 2; nr < nr_merged_lower; nr++) +- seq_printf(m, ":%s", ofs->config.lowerdirs[nr]); +- /* dump data lower layers */ +- for (nr = 0; nr < ofs->numdatalayer; nr++) +- seq_printf(m, "::%s", lowerdatadirs[nr]); ++ ++ /* ++ * lowerdirs[] starts from offset 1, then ++ * >= 0 regular lower layers prefixed with : and ++ * >= 0 data-only lower layers prefixed with :: ++ * ++ * we need to escase comma and space like seq_show_option() does and ++ * we also need to escape the colon separator from lowerdir paths. ++ */ ++ seq_puts(m, ",lowerdir="); ++ for (nr = 1; nr < ofs->numlayer; nr++) { ++ if (nr > 1) ++ seq_putc(m, ':'); ++ if (nr >= nr_merged_lower) ++ seq_putc(m, ':'); ++ seq_escape(m, ofs->config.lowerdirs[nr], ":, \t\n\\"); ++ } + if (ofs->config.upperdir) { + seq_show_option(m, "upperdir", ofs->config.upperdir); + seq_show_option(m, "workdir", ofs->config.workdir); +-- +2.40.1 + diff --git a/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch b/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch new file mode 100644 index 00000000000..f1b9ba66386 --- /dev/null +++ b/queue-6.5/ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch @@ -0,0 +1,164 @@ +From 664e6fe0160c646d8380c4020a1951fcc2365eac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Oct 2023 14:21:49 +0300 +Subject: ovl: make use of ->layers safe in rcu pathwalk + +From: Amir Goldstein + +[ Upstream commit a535116d80339dbfe50b9b81b2f808c69eefbbc3 ] + +ovl_permission() accesses ->layers[...].mnt; we can't have ->layers +freed without an RCU delay on fs shutdown. + +Fortunately, kern_unmount_array() that is used to drop those mounts +does include an RCU delay, so freeing is delayed; unfortunately, the +array passed to kern_unmount_array() is formed by mangling ->layers +contents and that happens without any delays. + +The ->layers[...].name string entries are used to store the strings to +display in "lowerdir=..." by ovl_show_options(). Those entries are not +accessed in RCU walk. + +Move the name strings into a separate array ofs->config.lowerdirs and +reuse the ofs->config.lowerdirs array as the temporary mount array to +pass to kern_unmount_array(). + +Reported-by: Al Viro +Link: https://lore.kernel.org/r/20231002023711.GP3389589@ZenIV/ +Acked-by: Miklos Szeredi +Signed-off-by: Amir Goldstein +Stable-dep-of: 32db51070850 ("ovl: fix regression in showing lowerdir mount option") +Signed-off-by: Sasha Levin +--- + fs/overlayfs/ovl_entry.h | 10 +--------- + fs/overlayfs/params.c | 17 +++++++++-------- + fs/overlayfs/super.c | 18 +++++++++++------- + 3 files changed, 21 insertions(+), 24 deletions(-) + +diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h +index 306e1ecdc96d3..2b703521871ea 100644 +--- a/fs/overlayfs/ovl_entry.h ++++ b/fs/overlayfs/ovl_entry.h +@@ -8,6 +8,7 @@ + struct ovl_config { + char *upperdir; + char *workdir; ++ char **lowerdirs; + bool default_permissions; + int redirect_mode; + bool index; +@@ -38,17 +39,8 @@ struct ovl_layer { + int idx; + /* One fsid per unique underlying sb (upper fsid == 0) */ + int fsid; +- char *name; + }; + +-/* +- * ovl_free_fs() relies on @mnt being the first member when unmounting +- * the private mounts created for each layer. Let's check both the +- * offset and type. +- */ +-static_assert(offsetof(struct ovl_layer, mnt) == 0); +-static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *)); +- + struct ovl_path { + const struct ovl_layer *layer; + struct dentry *dentry; +diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c +index c0f70af422d6c..e6edad7542e88 100644 +--- a/fs/overlayfs/params.c ++++ b/fs/overlayfs/params.c +@@ -695,12 +695,12 @@ void ovl_free_fs(struct ovl_fs *ofs) + if (ofs->upperdir_locked) + ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); + +- /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ +- mounts = (struct vfsmount **) ofs->layers; ++ /* Reuse ofs->config.lowerdirs as a vfsmount array before freeing it */ ++ mounts = (struct vfsmount **) ofs->config.lowerdirs; + for (i = 0; i < ofs->numlayer; i++) { + iput(ofs->layers[i].trap); ++ kfree(ofs->config.lowerdirs[i]); + mounts[i] = ofs->layers[i].mnt; +- kfree(ofs->layers[i].name); + } + kern_unmount_array(mounts, ofs->numlayer); + kfree(ofs->layers); +@@ -708,6 +708,7 @@ void ovl_free_fs(struct ovl_fs *ofs) + free_anon_bdev(ofs->fs[i].pseudo_dev); + kfree(ofs->fs); + ++ kfree(ofs->config.lowerdirs); + kfree(ofs->config.upperdir); + kfree(ofs->config.workdir); + if (ofs->creator_cred) +@@ -857,16 +858,16 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) + struct super_block *sb = dentry->d_sb; + struct ovl_fs *ofs = sb->s_fs_info; + size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; +- const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower]; ++ char **lowerdatadirs = &ofs->config.lowerdirs[nr_merged_lower]; + +- /* ofs->layers[0] is the upper layer */ +- seq_printf(m, ",lowerdir=%s", ofs->layers[1].name); ++ /* lowerdirs[] starts from offset 1 */ ++ seq_printf(m, ",lowerdir=%s", ofs->config.lowerdirs[1]); + /* dump regular lower layers */ + for (nr = 2; nr < nr_merged_lower; nr++) +- seq_printf(m, ":%s", ofs->layers[nr].name); ++ seq_printf(m, ":%s", ofs->config.lowerdirs[nr]); + /* dump data lower layers */ + for (nr = 0; nr < ofs->numdatalayer; nr++) +- seq_printf(m, "::%s", data_layers[nr].name); ++ seq_printf(m, "::%s", lowerdatadirs[nr]); + if (ofs->config.upperdir) { + seq_show_option(m, "upperdir", ofs->config.upperdir); + seq_show_option(m, "workdir", ofs->config.workdir); +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 1090c68e5b051..80a70eaa30d90 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -565,11 +565,6 @@ static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, + upper_layer->idx = 0; + upper_layer->fsid = 0; + +- err = -ENOMEM; +- upper_layer->name = kstrdup(ofs->config.upperdir, GFP_KERNEL); +- if (!upper_layer->name) +- goto out; +- + /* + * Inherit SB_NOSEC flag from upperdir. + * +@@ -1113,7 +1108,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, + layers[ofs->numlayer].idx = ofs->numlayer; + layers[ofs->numlayer].fsid = fsid; + layers[ofs->numlayer].fs = &ofs->fs[fsid]; +- layers[ofs->numlayer].name = l->name; ++ /* Store for printing lowerdir=... in ovl_show_options() */ ++ ofs->config.lowerdirs[ofs->numlayer] = l->name; + l->name = NULL; + ofs->numlayer++; + ofs->fs[fsid].is_lower = true; +@@ -1358,8 +1354,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) + if (!layers) + goto out_err; + ++ ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL); ++ if (!ofs->config.lowerdirs) { ++ kfree(layers); ++ goto out_err; ++ } + ofs->layers = layers; +- /* Layer 0 is reserved for upper even if there's no upper */ ++ /* ++ * Layer 0 is reserved for upper even if there's no upper. ++ * For consistency, config.lowerdirs[0] is NULL. ++ */ + ofs->numlayer = 1; + + sb->s_stack_depth = 0; +-- +2.40.1 + diff --git a/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch b/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch new file mode 100644 index 00000000000..cbfe771d485 --- /dev/null +++ b/queue-6.5/powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch @@ -0,0 +1,52 @@ +From c7f9a9e8a17b585d270d498eb8f97c56c6ef9e02 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Sep 2023 20:31:16 +0200 +Subject: powerpc/64e: Fix wrong test in __ptep_test_and_clear_young() + +From: Christophe Leroy + +[ Upstream commit 5ea0bbaa32e8f54e9a57cfee4a3b8769b80be0d2 ] + +Commit 45201c879469 ("powerpc/nohash: Remove hash related code from +nohash headers.") replaced: + + if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + +By: + + if (pte_young(*ptep)) + return 0; + +But it should be: + + if (!pte_young(*ptep)) + return 0; + +Fix it. + +Fixes: 45201c879469 ("powerpc/nohash: Remove hash related code from nohash headers.") +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://msgid.link/8bb7f06494e21adada724ede47a4c3d97e879d40.1695659959.git.christophe.leroy@csgroup.eu +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h +index 287e25864ffae..072048e723c9b 100644 +--- a/arch/powerpc/include/asm/nohash/64/pgtable.h ++++ b/arch/powerpc/include/asm/nohash/64/pgtable.h +@@ -197,7 +197,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + { + unsigned long old; + +- if (pte_young(*ptep)) ++ if (!pte_young(*ptep)) + return 0; + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); + return (old & _PAGE_ACCESSED) != 0; +-- +2.40.1 + diff --git a/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch b/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch new file mode 100644 index 00000000000..3ac43cd0527 --- /dev/null +++ b/queue-6.5/powerpc-8xx-fix-pte_access_permitted-for-page_none.patch @@ -0,0 +1,62 @@ +From 27ca6c2812d62d46ae1c8070d5d91e2f81ab7b09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Sep 2023 20:31:15 +0200 +Subject: powerpc/8xx: Fix pte_access_permitted() for PAGE_NONE + +From: Christophe Leroy + +[ Upstream commit 5d9cea8a552ee122e21fbd5a3c5d4eb85f648e06 ] + +On 8xx, PAGE_NONE is handled by setting _PAGE_NA instead of clearing +_PAGE_USER. + +But then pte_user() returns 1 also for PAGE_NONE. + +As _PAGE_NA prevent reads, add a specific version of pte_read() +that returns 0 when _PAGE_NA is set instead of always returning 1. + +Fixes: 351750331fc1 ("powerpc/mm: Introduce _PAGE_NA") +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://msgid.link/57bcfbe578e43123f9ed73e040229b80f1ad56ec.1695659959.git.christophe.leroy@csgroup.eu +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/nohash/32/pte-8xx.h | 7 +++++++ + arch/powerpc/include/asm/nohash/pgtable.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h +index 1a89ebdc3acc9..0238e6bd0d6c1 100644 +--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h ++++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h +@@ -94,6 +94,13 @@ static inline pte_t pte_wrprotect(pte_t pte) + + #define pte_wrprotect pte_wrprotect + ++static inline int pte_read(pte_t pte) ++{ ++ return (pte_val(pte) & _PAGE_RO) != _PAGE_NA; ++} ++ ++#define pte_read pte_read ++ + static inline int pte_write(pte_t pte) + { + return !(pte_val(pte) & _PAGE_RO); +diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h +index a6caaaab6f922..3af11981fcd55 100644 +--- a/arch/powerpc/include/asm/nohash/pgtable.h ++++ b/arch/powerpc/include/asm/nohash/pgtable.h +@@ -25,7 +25,9 @@ static inline int pte_write(pte_t pte) + return pte_val(pte) & _PAGE_RW; + } + #endif ++#ifndef pte_read + static inline int pte_read(pte_t pte) { return 1; } ++#endif + static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } + static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } + static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } +-- +2.40.1 + diff --git a/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch new file mode 100644 index 00000000000..b7f547b9006 --- /dev/null +++ b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch @@ -0,0 +1,190 @@ +From d187b9c8315c6221a4224c99f361c512d97c6114 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 15 Oct 2023 12:02:02 -0700 +Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible" + +From: Linus Torvalds + +[ Upstream commit fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 ] + +This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the +two subsequent fixes to it: + + 3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs") + b1472a60a584 ("x86/smp: Don't send INIT to boot CPU") + +because it seems to result in hung machines at shutdown. Particularly +some Dell machines, but Thomas says + + "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs - + at least that's what I could figure out from the various bug reports. + + I don't know which CPUs the DELL machines have, so I can't say it's a + pattern. + + I agree with the revert for now" + +Ashok Raj chimes in: + + "There was a report (probably this same one), and it turns out it was a + bug in the BIOS SMI handler. + + The client BIOS's were waiting for the lowest APICID to be the SMI + rendevous master. If this is MeteorLake, the BSP wasn't the one with + the lowest APIC and it triped here. + + The BIOS change is also being pushed to others for assimilation :) + + Server BIOS's had this correctly for a while now" + +and it does look likely to be some bad interaction between SMI and the +non-BSP cores having put into INIT (and thus unresponsive until reset). + +Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429 +Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/ +Link: https://forum.artixlinux.org/index.php/topic,5997.0.html +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279 +Acked-by: Thomas Gleixner +Cc: Ashok Raj +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/smp.h | 1 - + arch/x86/kernel/smp.c | 39 +++++++------------------------------- + arch/x86/kernel/smpboot.c | 27 -------------------------- + 3 files changed, 7 insertions(+), 60 deletions(-) + +diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h +index c1bcb1449ee89..7bbeac8bd49ea 100644 +--- a/arch/x86/include/asm/smp.h ++++ b/arch/x86/include/asm/smp.h +@@ -134,7 +134,6 @@ void native_send_call_func_ipi(const struct cpumask *mask); + void native_send_call_func_single_ipi(int cpu); + void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); + +-bool smp_park_other_cpus_in_init(void); + void smp_store_cpu_info(int id); + + asmlinkage __visible void smp_reboot_interrupt(void); +diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c +index 7eb18ca7bd45b..cc8ef9bfcb52f 100644 +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) + } + + /* +- * Disable virtualization, APIC etc. and park the CPU in a HLT loop ++ * this function calls the 'stop' function on all other CPUs in the system. + */ + DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) + { +@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) + * 2) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * +- * 3) If the system uses INIT/STARTUP for CPU bringup, then +- * send all present CPUs an INIT vector, which brings them +- * completely out of the way. ++ * 3) If #2 timed out send an NMI to the CPUs which did not ++ * yet report + * +- * 4) If #3 is not possible and #2 timed out send an NMI to the +- * CPUs which did not yet report +- * +- * 5) Wait for all other CPUs to report that they reached the ++ * 4) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * +- * #4 can obviously race against a CPU reaching the HLT loop late. ++ * #3 can obviously race against a CPU reaching the HLT loop late. + * That CPU will have reported already and the "have all CPUs + * reached HLT" condition will be true despite the fact that the + * other CPU is still handling the NMI. Again, there is no +@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) + /* + * Don't wait longer than a second for IPI completion. The + * wait request is not checked here because that would +- * prevent an NMI/INIT shutdown in case that not all ++ * prevent an NMI shutdown attempt in case that not all + * CPUs reach shutdown state. + */ + timeout = USEC_PER_SEC; +@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) + udelay(1); + } + +- /* +- * Park all other CPUs in INIT including "offline" CPUs, if +- * possible. That's a safe place where they can't resume execution +- * of HLT and then execute the HLT loop from overwritten text or +- * page tables. +- * +- * The only downside is a broadcast MCE, but up to the point where +- * the kexec() kernel brought all APs online again an MCE will just +- * make HLT resume and handle the MCE. The machine crashes and burns +- * due to overwritten text, page tables and data. So there is a +- * choice between fire and frying pan. The result is pretty much +- * the same. Chose frying pan until x86 provides a sane mechanism +- * to park a CPU. +- */ +- if (smp_park_other_cpus_in_init()) +- goto done; +- +- /* +- * If park with INIT was not possible and the REBOOT_VECTOR didn't +- * take all secondary CPUs offline, try with the NMI. +- */ ++ /* if the REBOOT_VECTOR didn't work, try with the NMI */ + if (!cpumask_empty(&cpus_stop_mask)) { + /* + * If NMI IPI is enabled, try to register the stop handler +@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) + udelay(1); + } + +-done: + local_irq_save(flags); + disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index d3179aae1384f..4c502de98746c 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -1346,33 +1346,6 @@ void arch_thaw_secondary_cpus_end(void) + cache_aps_init(); + } + +-bool smp_park_other_cpus_in_init(void) +-{ +- unsigned int cpu, this_cpu = smp_processor_id(); +- unsigned int apicid; +- +- if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) +- return false; +- +- /* +- * If this is a crash stop which does not execute on the boot CPU, +- * then this cannot use the INIT mechanism because INIT to the boot +- * CPU will reset the machine. +- */ +- if (this_cpu) +- return false; +- +- for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { +- if (cpu == this_cpu) +- continue; +- apicid = apic->cpu_present_to_apicid(cpu); +- if (apicid == BAD_APICID) +- continue; +- send_init_sequence(apicid); +- } +- return true; +-} +- + /* + * Early setup to make printk work. + */ +-- +2.40.1 + diff --git a/queue-6.5/series b/queue-6.5/series index 6cc0771e37e..4cfbdf0f530 100644 --- a/queue-6.5/series +++ b/queue-6.5/series @@ -178,3 +178,14 @@ usb-gadget-ncm-handle-decoding-of-multiple-ntb-s-in-unwrap-call.patch usb-cdnsp-fixes-issue-with-dequeuing-not-queued-requests.patch usb-typec-qcom-update-the-logic-of-regulator-enable-and-disable.patch usb-misc-onboard_hub-add-support-for-microchip-usb2412-usb-2.0-hub.patch +dmaengine-idxd-use-spin_lock_irqsave-before-wait_eve.patch +dmaengine-mediatek-fix-deadlock-caused-by-synchroniz.patch +powerpc-8xx-fix-pte_access_permitted-for-page_none.patch +powerpc-64e-fix-wrong-test-in-__ptep_test_and_clear_.patch +fs-fix-kernel-doc-warnings.patch +fs-factor-out-vfs_parse_monolithic_sep-helper.patch +ovl-fix-regression-in-parsing-of-mount-options-with-.patch +ovl-make-use-of-layers-safe-in-rcu-pathwalk.patch +ovl-fix-regression-in-showing-lowerdir-mount-option.patch +x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch +revert-x86-smp-put-cpus-into-init-on-shutdown-if-pos.patch diff --git a/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch b/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch new file mode 100644 index 00000000000..0e76a03e8b9 --- /dev/null +++ b/queue-6.5/x86-smpboot-change-smp_store_boot_cpu_info-to-static.patch @@ -0,0 +1,50 @@ +From 3bfa185fb41b94708364663e230ae722dfff266c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jul 2023 18:05:33 +0000 +Subject: x86/smpboot: Change smp_store_boot_cpu_info() to static + +From: Sohil Mehta + +[ Upstream commit d7114f83ee051dfeac82546d7ba03d74f8b92af3 ] + +The function is only used locally. Convert it to a static one. + +Signed-off-by: Sohil Mehta +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20230727180533.3119660-4-sohil.mehta@intel.com +Stable-dep-of: fbe1bf1e5ff1 ("Revert "x86/smp: Put CPUs into INIT on shutdown if possible"") +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/smp.h | 2 -- + arch/x86/kernel/smpboot.c | 2 +- + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h +index 600cf25dbfc64..c1bcb1449ee89 100644 +--- a/arch/x86/include/asm/smp.h ++++ b/arch/x86/include/asm/smp.h +@@ -135,8 +135,6 @@ void native_send_call_func_single_ipi(int cpu); + void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); + + bool smp_park_other_cpus_in_init(void); +- +-void smp_store_boot_cpu_info(void); + void smp_store_cpu_info(int id); + + asmlinkage __visible void smp_reboot_interrupt(void); +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 747b83a373a2d..d3179aae1384f 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -422,7 +422,7 @@ int topology_update_die_map(unsigned int die, unsigned int cpu) + return 0; + } + +-void __init smp_store_boot_cpu_info(void) ++static void __init smp_store_boot_cpu_info(void) + { + int id = 0; /* CPU 0 */ + struct cpuinfo_x86 *c = &cpu_data(id); +-- +2.40.1 + -- 2.47.3