From a02b57a31e925a012247dfc91dc8ff07d01e66a1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 18 Apr 2023 12:28:55 +0200 Subject: [PATCH] 5.10-stable patches added patches: kexec-move-locking-into-do_kexec_load.patch kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch panic-kexec-make-__crash_kexec-nmi-safe.patch --- ...exec-move-locking-into-do_kexec_load.patch | 146 +++++++++++ ...xec_mutex-acquisitions-into-trylocks.patch | 111 +++++++++ ...ic-kexec-make-__crash_kexec-nmi-safe.patch | 228 ++++++++++++++++++ queue-5.10/series | 3 + 4 files changed, 488 insertions(+) create mode 100644 queue-5.10/kexec-move-locking-into-do_kexec_load.patch create mode 100644 queue-5.10/kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch create mode 100644 queue-5.10/panic-kexec-make-__crash_kexec-nmi-safe.patch diff --git a/queue-5.10/kexec-move-locking-into-do_kexec_load.patch b/queue-5.10/kexec-move-locking-into-do_kexec_load.patch new file mode 100644 index 00000000000..fce568c7292 --- /dev/null +++ b/queue-5.10/kexec-move-locking-into-do_kexec_load.patch @@ -0,0 +1,146 @@ +From 4b692e861619353ce069e547a67c8d0e32d9ef3d Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Wed, 8 Sep 2021 15:18:10 -0700 +Subject: kexec: move locking into do_kexec_load + +From: Arnd Bergmann + +commit 4b692e861619353ce069e547a67c8d0e32d9ef3d upstream. + +Patch series "compat: remove compat_alloc_user_space", v5. + +Going through compat_alloc_user_space() to convert indirect system call +arguments tends to add complexity compared to handling the native and +compat logic in the same code. + +This patch (of 6): + +The locking is the same between the native and compat version of +sys_kexec_load(), so it can be done in the common implementation to reduce +duplication. + +Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org +Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org +Signed-off-by: Arnd Bergmann +Co-developed-by: Eric Biederman +Co-developed-by: Christoph Hellwig +Acked-by: "Eric W. Biederman" +Cc: Catalin Marinas +Cc: Will Deacon +Cc: Thomas Bogendoerfer +Cc: "James E.J. Bottomley" +Cc: Helge Deller +Cc: Michael Ellerman +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Heiko Carstens +Cc: Vasily Gorbik +Cc: Christian Borntraeger +Cc: "David S. Miller" +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Al Viro +Cc: Feng Tang +Cc: Christoph Hellwig +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Wen Yang +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kexec.c | 44 ++++++++++++++++---------------------------- + 1 file changed, 16 insertions(+), 28 deletions(-) + +--- a/kernel/kexec.c ++++ b/kernel/kexec.c +@@ -110,6 +110,17 @@ static int do_kexec_load(unsigned long e + unsigned long i; + int ret; + ++ /* ++ * Because we write directly to the reserved memory region when loading ++ * crash kernels we need a mutex here to prevent multiple crash kernels ++ * from attempting to load simultaneously, and to prevent a crash kernel ++ * from loading over the top of a in use crash kernel. ++ * ++ * KISS: always take the mutex. ++ */ ++ if (!mutex_trylock(&kexec_mutex)) ++ return -EBUSY; ++ + if (flags & KEXEC_ON_CRASH) { + dest_image = &kexec_crash_image; + if (kexec_crash_image) +@@ -121,7 +132,8 @@ static int do_kexec_load(unsigned long e + if (nr_segments == 0) { + /* Uninstall image */ + kimage_free(xchg(dest_image, NULL)); +- return 0; ++ ret = 0; ++ goto out_unlock; + } + if (flags & KEXEC_ON_CRASH) { + /* +@@ -134,7 +146,7 @@ static int do_kexec_load(unsigned long e + + ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); + if (ret) +- return ret; ++ goto out_unlock; + + if (flags & KEXEC_PRESERVE_CONTEXT) + image->preserve_context = 1; +@@ -171,6 +183,8 @@ out: + arch_kexec_protect_crashkres(); + + kimage_free(image); ++out_unlock: ++ mutex_unlock(&kexec_mutex); + return ret; + } + +@@ -247,21 +261,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned lon + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) + return -EINVAL; + +- /* Because we write directly to the reserved memory +- * region when loading crash kernels we need a mutex here to +- * prevent multiple crash kernels from attempting to load +- * simultaneously, and to prevent a crash kernel from loading +- * over the top of a in use crash kernel. +- * +- * KISS: always take the mutex. +- */ +- if (!mutex_trylock(&kexec_mutex)) +- return -EBUSY; +- + result = do_kexec_load(entry, nr_segments, segments, flags); + +- mutex_unlock(&kexec_mutex); +- + return result; + } + +@@ -301,21 +302,8 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compa + return -EFAULT; + } + +- /* Because we write directly to the reserved memory +- * region when loading crash kernels we need a mutex here to +- * prevent multiple crash kernels from attempting to load +- * simultaneously, and to prevent a crash kernel from loading +- * over the top of a in use crash kernel. +- * +- * KISS: always take the mutex. +- */ +- if (!mutex_trylock(&kexec_mutex)) +- return -EBUSY; +- + result = do_kexec_load(entry, nr_segments, ksegments, flags); + +- mutex_unlock(&kexec_mutex); +- + return result; + } + #endif diff --git a/queue-5.10/kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch b/queue-5.10/kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch new file mode 100644 index 00000000000..3b958403c69 --- /dev/null +++ b/queue-5.10/kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch @@ -0,0 +1,111 @@ +From 7bb5da0d490b2d836c5218f5186ee588d2145310 Mon Sep 17 00:00:00 2001 +From: Valentin Schneider +Date: Thu, 30 Jun 2022 23:32:57 +0100 +Subject: kexec: turn all kexec_mutex acquisitions into trylocks + +From: Valentin Schneider + +commit 7bb5da0d490b2d836c5218f5186ee588d2145310 upstream. + +Patch series "kexec, panic: Making crash_kexec() NMI safe", v4. + + +This patch (of 2): + +Most acquistions of kexec_mutex are done via mutex_trylock() - those were +a direct "translation" from: + + 8c5a1cf0ad3a ("kexec: use a mutex for locking rather than xchg()") + +there have however been two additions since then that use mutex_lock(): +crash_get_memory_size() and crash_shrink_memory(). + +A later commit will replace said mutex with an atomic variable, and +locking operations will become atomic_cmpxchg(). Rather than having those +mutex_lock() become while (atomic_cmpxchg(&lock, 0, 1)), turn them into +trylocks that can return -EBUSY on acquisition failure. + +This does halve the printable size of the crash kernel, but that's still +neighbouring 2G for 32bit kernels which should be ample enough. + +Link: https://lkml.kernel.org/r/20220630223258.4144112-1-vschneid@redhat.com +Link: https://lkml.kernel.org/r/20220630223258.4144112-2-vschneid@redhat.com +Signed-off-by: Valentin Schneider +Cc: Arnd Bergmann +Cc: "Eric W . Biederman" +Cc: Juri Lelli +Cc: Luis Claudio R. Goncalves +Cc: Miaohe Lin +Cc: Petr Mladek +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Cc: Baoquan He +Signed-off-by: Andrew Morton +Signed-off-by: Wen Yang +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/kexec.h | 2 +- + kernel/kexec_core.c | 12 ++++++++---- + kernel/ksysfs.c | 7 ++++++- + 3 files changed, 15 insertions(+), 6 deletions(-) + +--- a/include/linux/kexec.h ++++ b/include/linux/kexec.h +@@ -380,8 +380,8 @@ extern note_buf_t __percpu *crash_notes; + extern bool kexec_in_progress; + + int crash_shrink_memory(unsigned long new_size); +-size_t crash_get_memory_size(void); + void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); ++ssize_t crash_get_memory_size(void); + + void arch_kexec_protect_crashkres(void); + void arch_kexec_unprotect_crashkres(void); +--- a/kernel/kexec_core.c ++++ b/kernel/kexec_core.c +@@ -989,13 +989,16 @@ void crash_kexec(struct pt_regs *regs) + } + } + +-size_t crash_get_memory_size(void) ++ssize_t crash_get_memory_size(void) + { +- size_t size = 0; ++ ssize_t size = 0; ++ ++ if (!mutex_trylock(&kexec_mutex)) ++ return -EBUSY; + +- mutex_lock(&kexec_mutex); + if (crashk_res.end != crashk_res.start) + size = resource_size(&crashk_res); ++ + mutex_unlock(&kexec_mutex); + return size; + } +@@ -1016,7 +1019,8 @@ int crash_shrink_memory(unsigned long ne + unsigned long old_size; + struct resource *ram_res; + +- mutex_lock(&kexec_mutex); ++ if (!mutex_trylock(&kexec_mutex)) ++ return -EBUSY; + + if (kexec_crash_image) { + ret = -ENOENT; +--- a/kernel/ksysfs.c ++++ b/kernel/ksysfs.c +@@ -106,7 +106,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded); + static ssize_t kexec_crash_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) + { +- return sprintf(buf, "%zu\n", crash_get_memory_size()); ++ ssize_t size = crash_get_memory_size(); ++ ++ if (size < 0) ++ return size; ++ ++ return sprintf(buf, "%zd\n", size); + } + static ssize_t kexec_crash_size_store(struct kobject *kobj, + struct kobj_attribute *attr, diff --git a/queue-5.10/panic-kexec-make-__crash_kexec-nmi-safe.patch b/queue-5.10/panic-kexec-make-__crash_kexec-nmi-safe.patch new file mode 100644 index 00000000000..8b1ec9d8012 --- /dev/null +++ b/queue-5.10/panic-kexec-make-__crash_kexec-nmi-safe.patch @@ -0,0 +1,228 @@ +From 05c6257433b7212f07a7e53479a8ab038fc1666a Mon Sep 17 00:00:00 2001 +From: Valentin Schneider +Date: Thu, 30 Jun 2022 23:32:58 +0100 +Subject: panic, kexec: make __crash_kexec() NMI safe + +From: Valentin Schneider + +commit 05c6257433b7212f07a7e53479a8ab038fc1666a upstream. + +Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI +panic() doesn't work. The cause of that lies in the PREEMPT_RT definition +of mutex_trylock(): + + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) + return 0; + +This prevents an nmi_panic() from executing the main body of +__crash_kexec() which does the actual kexec into the kdump kernel. The +warning and return are explained by: + + 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") + [...] + The reasons for this are: + + 1) There is a potential deadlock in the slowpath + + 2) Another cpu which blocks on the rtmutex will boost the task + which allegedly locked the rtmutex, but that cannot work + because the hard/softirq context borrows the task context. + +Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex +and replace it with an atomic variable. This is somewhat overzealous as +*some* callsites could keep using a mutex (e.g. the sysfs-facing ones +like crash_shrink_memory()), but this has the benefit of involving a +single unified lock and preventing any future NMI-related surprises. + +Tested by triggering NMI panics via: + + $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi + $ echo 1 > /proc/sys/kernel/unknown_nmi_panic + $ echo 1 > /proc/sys/kernel/panic + + $ ipmitool power diag + +Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com +Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") +Signed-off-by: Valentin Schneider +Cc: Arnd Bergmann +Cc: Baoquan He +Cc: "Eric W . Biederman" +Cc: Juri Lelli +Cc: Luis Claudio R. Goncalves +Cc: Miaohe Lin +Cc: Petr Mladek +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Signed-off-by: Andrew Morton +Signed-off-by: Wen Yang +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kexec.c | 11 ++++------- + kernel/kexec_core.c | 20 ++++++++++---------- + kernel/kexec_file.c | 4 ++-- + kernel/kexec_internal.h | 15 ++++++++++++++- + 4 files changed, 30 insertions(+), 20 deletions(-) + +--- a/kernel/kexec.c ++++ b/kernel/kexec.c +@@ -112,13 +112,10 @@ static int do_kexec_load(unsigned long e + + /* + * Because we write directly to the reserved memory region when loading +- * crash kernels we need a mutex here to prevent multiple crash kernels +- * from attempting to load simultaneously, and to prevent a crash kernel +- * from loading over the top of a in use crash kernel. +- * +- * KISS: always take the mutex. ++ * crash kernels we need a serialization here to prevent multiple crash ++ * kernels from attempting to load simultaneously. + */ +- if (!mutex_trylock(&kexec_mutex)) ++ if (!kexec_trylock()) + return -EBUSY; + + if (flags & KEXEC_ON_CRASH) { +@@ -184,7 +181,7 @@ out: + + kimage_free(image); + out_unlock: +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + return ret; + } + +--- a/kernel/kexec_core.c ++++ b/kernel/kexec_core.c +@@ -45,7 +45,7 @@ + #include + #include "kexec_internal.h" + +-DEFINE_MUTEX(kexec_mutex); ++atomic_t __kexec_lock = ATOMIC_INIT(0); + + /* Per cpu memory for storing cpu states in case of system crash. */ + note_buf_t __percpu *crash_notes; +@@ -943,7 +943,7 @@ int kexec_load_disabled; + */ + void __noclone __crash_kexec(struct pt_regs *regs) + { +- /* Take the kexec_mutex here to prevent sys_kexec_load ++ /* Take the kexec_lock here to prevent sys_kexec_load + * running on one cpu from replacing the crash kernel + * we are using after a panic on a different cpu. + * +@@ -951,7 +951,7 @@ void __noclone __crash_kexec(struct pt_r + * of memory the xchg(&kexec_crash_image) would be + * sufficient. But since I reuse the memory... + */ +- if (mutex_trylock(&kexec_mutex)) { ++ if (kexec_trylock()) { + if (kexec_crash_image) { + struct pt_regs fixed_regs; + +@@ -960,7 +960,7 @@ void __noclone __crash_kexec(struct pt_r + machine_crash_shutdown(&fixed_regs); + machine_kexec(kexec_crash_image); + } +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + } + } + STACK_FRAME_NON_STANDARD(__crash_kexec); +@@ -993,13 +993,13 @@ ssize_t crash_get_memory_size(void) + { + ssize_t size = 0; + +- if (!mutex_trylock(&kexec_mutex)) ++ if (!kexec_trylock()) + return -EBUSY; + + if (crashk_res.end != crashk_res.start) + size = resource_size(&crashk_res); + +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + return size; + } + +@@ -1019,7 +1019,7 @@ int crash_shrink_memory(unsigned long ne + unsigned long old_size; + struct resource *ram_res; + +- if (!mutex_trylock(&kexec_mutex)) ++ if (!kexec_trylock()) + return -EBUSY; + + if (kexec_crash_image) { +@@ -1058,7 +1058,7 @@ int crash_shrink_memory(unsigned long ne + insert_resource(&iomem_resource, ram_res); + + unlock: +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + return ret; + } + +@@ -1130,7 +1130,7 @@ int kernel_kexec(void) + { + int error = 0; + +- if (!mutex_trylock(&kexec_mutex)) ++ if (!kexec_trylock()) + return -EBUSY; + if (!kexec_image) { + error = -EINVAL; +@@ -1205,7 +1205,7 @@ int kernel_kexec(void) + #endif + + Unlock: +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + return error; + } + +--- a/kernel/kexec_file.c ++++ b/kernel/kexec_file.c +@@ -343,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, ke + + image = NULL; + +- if (!mutex_trylock(&kexec_mutex)) ++ if (!kexec_trylock()) + return -EBUSY; + + dest_image = &kexec_image; +@@ -415,7 +415,7 @@ out: + if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + +- mutex_unlock(&kexec_mutex); ++ kexec_unlock(); + kimage_free(image); + return ret; + } +--- a/kernel/kexec_internal.h ++++ b/kernel/kexec_internal.h +@@ -15,7 +15,20 @@ int kimage_is_destination_range(struct k + + int machine_kexec_post_load(struct kimage *image); + +-extern struct mutex kexec_mutex; ++/* ++ * Whatever is used to serialize accesses to the kexec_crash_image needs to be ++ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a ++ * "simple" atomic variable that is acquired with a cmpxchg(). ++ */ ++extern atomic_t __kexec_lock; ++static inline bool kexec_trylock(void) ++{ ++ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0; ++} ++static inline void kexec_unlock(void) ++{ ++ atomic_set_release(&__kexec_lock, 0); ++} + + #ifdef CONFIG_KEXEC_FILE + #include diff --git a/queue-5.10/series b/queue-5.10/series index 4d014179d2d..24702d5c946 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -118,3 +118,6 @@ kbuild-check-the-minimum-assembler-version-in-kconfig.patch kbuild-switch-to-f-variants-of-integrated-assembler-flag.patch kbuild-check-config_as_is_llvm-instead-of-llvm_ias.patch riscv-handle-zicsr-zifencei-issues-between-clang-and-binutils.patch +kexec-move-locking-into-do_kexec_load.patch +kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch +panic-kexec-make-__crash_kexec-nmi-safe.patch -- 2.47.3