]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.8-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Oct 2020 13:03:58 +0000 (14:03 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Oct 2020 13:03:58 +0000 (14:03 +0100)
added patches:
arm64-link-with-z-norelro-regardless-of-config_relocatable.patch
arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch
arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch
efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch
fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch
x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch
x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch
x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch

queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch [new file with mode: 0644]
queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch [new file with mode: 0644]
queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch [new file with mode: 0644]
queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch [new file with mode: 0644]
queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch [new file with mode: 0644]
queue-5.8/series
queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch [new file with mode: 0644]
queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch [new file with mode: 0644]
queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch [new file with mode: 0644]

diff --git a/queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch b/queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch
new file mode 100644 (file)
index 0000000..a1759f5
--- /dev/null
@@ -0,0 +1,48 @@
+From 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 Mon Sep 17 00:00:00 2001
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Fri, 16 Oct 2020 10:53:39 -0700
+Subject: arm64: link with -z norelro regardless of CONFIG_RELOCATABLE
+
+From: Nick Desaulniers <ndesaulniers@google.com>
+
+commit 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 upstream.
+
+With CONFIG_EXPERT=y, CONFIG_KASAN=y, CONFIG_RANDOMIZE_BASE=n,
+CONFIG_RELOCATABLE=n, we observe the following failure when trying to
+link the kernel image with LD=ld.lld:
+
+error: section: .exit.data is not contiguous with other relro sections
+
+ld.lld defaults to -z relro while ld.bfd defaults to -z norelro. This
+was previously fixed, but only for CONFIG_RELOCATABLE=y.
+
+Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options")
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201016175339.2429280-1-ndesaulniers@google.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/Makefile |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/Makefile
++++ b/arch/arm64/Makefile
+@@ -10,14 +10,14 @@
+ #
+ # Copyright (C) 1995-2001 by Russell King
+-LDFLAGS_vmlinux       :=--no-undefined -X
++LDFLAGS_vmlinux       :=--no-undefined -X -z norelro
+ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
+ ifeq ($(CONFIG_RELOCATABLE), y)
+ # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+ # for relative relocs, since this leads to better Image compression
+ # with the relocation offsets always being zero.
+-LDFLAGS_vmlinux               += -shared -Bsymbolic -z notext -z norelro \
++LDFLAGS_vmlinux               += -shared -Bsymbolic -z notext \
+                       $(call ld-option, --no-apply-dynamic-relocs)
+ endif
diff --git a/queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch b/queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch
new file mode 100644 (file)
index 0000000..730381a
--- /dev/null
@@ -0,0 +1,57 @@
+From 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 16 Jul 2020 17:11:09 +0100
+Subject: arm64: Run ARCH_WORKAROUND_1 enabling code on all CPUs
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 upstream.
+
+Commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack
+thereof") changed the way we deal with ARCH_WORKAROUND_1, by moving most
+of the enabling code to the .matches() callback.
+
+This has the unfortunate effect that the workaround gets only enabled on
+the first affected CPU, and no other.
+
+In order to address this, forcefully call the .matches() callback from a
+.cpu_enable() callback, which brings us back to the original behaviour.
+
+Fixes: 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/cpu_errata.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -599,6 +599,12 @@ check_branch_predictor(const struct arm6
+       return (need_wa > 0);
+ }
++static void
++cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
++{
++      cap->matches(cap, SCOPE_LOCAL_CPU);
++}
++
+ static const __maybe_unused struct midr_range tx2_family_cpus[] = {
+       MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+       MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+@@ -890,9 +896,11 @@ const struct arm64_cpu_capabilities arm6
+       },
+ #endif
+       {
++              .desc = "Branch predictor hardening",
+               .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = check_branch_predictor,
++              .cpu_enable = cpu_enable_branch_predictor_hardening,
+       },
+ #ifdef CONFIG_HARDEN_EL2_VECTORS
+       {
diff --git a/queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch b/queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch
new file mode 100644 (file)
index 0000000..e22b265
--- /dev/null
@@ -0,0 +1,55 @@
+From 39533e12063be7f55e3d6ae21ffe067799d542a4 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 16 Jul 2020 17:11:10 +0100
+Subject: arm64: Run ARCH_WORKAROUND_2 enabling code on all CPUs
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 39533e12063be7f55e3d6ae21ffe067799d542a4 upstream.
+
+Commit 606f8e7b27bf ("arm64: capabilities: Use linear array for
+detection and verification") changed the way we deal with per-CPU errata
+by only calling the .matches() callback until one CPU is found to be
+affected. At this point, .matches() stop being called, and .cpu_enable()
+will be called on all CPUs.
+
+This breaks the ARCH_WORKAROUND_2 handling, as only a single CPU will be
+mitigated.
+
+In order to address this, forcefully call the .matches() callback from a
+.cpu_enable() callback, which brings us back to the original behaviour.
+
+Fixes: 606f8e7b27bf ("arm64: capabilities: Use linear array for detection and verification")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/cpu_errata.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -457,6 +457,12 @@ out_printmsg:
+       return required;
+ }
++static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
++{
++      if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
++              cap->matches(cap, SCOPE_LOCAL_CPU);
++}
++
+ /* known invulnerable cores */
+ static const struct midr_range arm64_ssb_cpus[] = {
+       MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+@@ -914,6 +920,7 @@ const struct arm64_cpu_capabilities arm6
+               .capability = ARM64_SSBD,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = has_ssbd_mitigation,
++              .cpu_enable = cpu_enable_ssbd_mitigation,
+               .midr_range_list = arm64_ssb_cpus,
+       },
+ #ifdef CONFIG_ARM64_ERRATUM_1418040
diff --git a/queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch b/queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch
new file mode 100644 (file)
index 0000000..d18329a
--- /dev/null
@@ -0,0 +1,66 @@
+From d32de9130f6c79533508e2c7879f18997bfbe2a0 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Sat, 26 Sep 2020 10:52:42 +0200
+Subject: efi/arm64: libstub: Deal gracefully with EFI_RNG_PROTOCOL failure
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit d32de9130f6c79533508e2c7879f18997bfbe2a0 upstream.
+
+Currently, on arm64, we abort on any failure from efi_get_random_bytes()
+other than EFI_NOT_FOUND when it comes to setting the physical seed for
+KASLR, but ignore such failures when obtaining the seed for virtual
+KASLR or for early seeding of the kernel's entropy pool via the config
+table. This is inconsistent, and may lead to unexpected boot failures.
+
+So let's permit any failure for the physical seed, and simply report
+the error code if it does not equal EFI_NOT_FOUND.
+
+Cc: <stable@vger.kernel.org> # v5.8+
+Reported-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/firmware/efi/libstub/arm64-stub.c |    8 +++++---
+ drivers/firmware/efi/libstub/fdt.c        |    4 +---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/firmware/efi/libstub/arm64-stub.c
++++ b/drivers/firmware/efi/libstub/arm64-stub.c
+@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigne
+                       status = efi_get_random_bytes(sizeof(phys_seed),
+                                                     (u8 *)&phys_seed);
+                       if (status == EFI_NOT_FOUND) {
+-                              efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
++                              efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n");
++                              efi_nokaslr = true;
+                       } else if (status != EFI_SUCCESS) {
+-                              efi_err("efi_get_random_bytes() failed\n");
+-                              return status;
++                              efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n",
++                                      status);
++                              efi_nokaslr = true;
+                       }
+               } else {
+                       efi_info("KASLR disabled on kernel command line\n");
+--- a/drivers/firmware/efi/libstub/fdt.c
++++ b/drivers/firmware/efi/libstub/fdt.c
+@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *ori
+       if (status)
+               goto fdt_set_fail;
+-      if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
++      if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
+               efi_status_t efi_status;
+               efi_status = efi_get_random_bytes(sizeof(fdt_val64),
+@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *ori
+                       status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64);
+                       if (status)
+                               goto fdt_set_fail;
+-              } else if (efi_status != EFI_NOT_FOUND) {
+-                      return efi_status;
+               }
+       }
diff --git a/queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch b/queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch
new file mode 100644 (file)
index 0000000..990f7a2
--- /dev/null
@@ -0,0 +1,47 @@
+From 06e67b849ab910a49a629445f43edb074153d0eb Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Fri, 2 Oct 2020 10:38:14 -0700
+Subject: fs/kernel_read_file: Remove FIRMWARE_EFI_EMBEDDED enum
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 06e67b849ab910a49a629445f43edb074153d0eb upstream.
+
+The "FIRMWARE_EFI_EMBEDDED" enum is a "where", not a "what". It
+should not be distinguished separately from just "FIRMWARE", as this
+confuses the LSMs about what is being loaded. Additionally, there was
+no actual validation of the firmware contents happening.
+
+Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firmware_request_platform()")
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
+Acked-by: Scott Branden <scott.branden@broadcom.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201002173828.2099543-3-keescook@chromium.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/firmware_loader/fallback_platform.c |    2 +-
+ include/linux/fs.h                               |    1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/base/firmware_loader/fallback_platform.c
++++ b/drivers/base/firmware_loader/fallback_platform.c
+@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw
+       if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
+               return -ENOENT;
+-      rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
++      rc = security_kernel_load_data(LOADING_FIRMWARE);
+       if (rc)
+               return rc;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3011,7 +3011,6 @@ extern int do_pipe_flags(int *, int);
+       id(UNKNOWN, unknown)            \
+       id(FIRMWARE, firmware)          \
+       id(FIRMWARE_PREALLOC_BUFFER, firmware)  \
+-      id(FIRMWARE_EFI_EMBEDDED, firmware)     \
+       id(MODULE, kernel-module)               \
+       id(KEXEC_IMAGE, kexec-image)            \
+       id(KEXEC_INITRAMFS, kexec-initramfs)    \
index 6ca858b6e1c40f19e0b61be023566c37e71f6fcc..b405edfdf902877c3a06c99fea0eb739dfbe8dce 100644 (file)
@@ -15,3 +15,11 @@ io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch
 io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch
 io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch
 scripts-setlocalversion-make-git-describe-output-more-reliable.patch
+efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch
+fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch
+arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch
+arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch
+arm64-link-with-z-norelro-regardless-of-config_relocatable.patch
+x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch
+x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch
+x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch
diff --git a/queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch b/queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch
new file mode 100644 (file)
index 0000000..44918bc
--- /dev/null
@@ -0,0 +1,182 @@
+From 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Oct 2020 20:40:25 -0700
+Subject: x86/copy_mc: Introduce copy_mc_enhanced_fast_string()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 upstream.
+
+The motivations to go rework memcpy_mcsafe() are that the benefit of
+doing slow and careful copies is obviated on newer CPUs, and that the
+current opt-in list of CPUs to instrument recovery is broken relative to
+those CPUs.  There is no need to keep an opt-in list up to date on an
+ongoing basis if pmem/dax operations are instrumented for recovery by
+default. With recovery enabled by default the old "mcsafe_key" opt-in to
+careful copying can be made a "fragile" opt-out. Where the "fragile"
+list takes steps to not consume poison across cachelines.
+
+The discussion with Linus made clear that the current "_mcsafe" suffix
+was imprecise to a fault. The operations that are needed by pmem/dax are
+to copy from a source address that might throw #MC to a destination that
+may write-fault, if it is a user page.
+
+So copy_to_user_mcsafe() becomes copy_mc_to_user() to indicate
+the separate precautions taken on source and destination.
+copy_mc_to_kernel() is introduced as a non-SMAP version that does not
+expect write-faults on the destination, but is still prepared to abort
+with an error code upon taking #MC.
+
+The original copy_mc_fragile() implementation had negative performance
+implications since it did not use the fast-string instruction sequence
+to perform copies. For this reason copy_mc_to_kernel() fell back to
+plain memcpy() to preserve performance on platforms that did not indicate
+the capability to recover from machine check exceptions. However, that
+capability detection was not architectural and now that some platforms
+can recover from fast-string consumption of memory errors the memcpy()
+fallback now causes these more capable platforms to fail.
+
+Introduce copy_mc_enhanced_fast_string() as the fast default
+implementation of copy_mc_to_kernel() and finalize the transition of
+copy_mc_fragile() to be a platform quirk to indicate 'copy-carefully'.
+With this in place, copy_mc_to_kernel() is fast and recovery-ready by
+default regardless of hardware capability.
+
+Thanks to Vivek for identifying that copy_user_generic() is not suitable
+as the copy_mc_to_user() backend since the #MC handler explicitly checks
+ex_has_fault_handler(). Thanks to the 0day robot for catching a
+performance bug in the x86/copy_mc_to_user implementation.
+
+ [ bp: Add the "why" for this change from the 0/2th message, massage. ]
+
+Fixes: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()")
+Reported-by: Erwin Tsaur <erwin.tsaur@intel.com>
+Reported-by: 0day robot <lkp@intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Tested-by: Erwin Tsaur <erwin.tsaur@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/160195562556.2163339.18063423034951948973.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/lib/copy_mc.c    |   32 +++++++++++++++++++++++---------
+ arch/x86/lib/copy_mc_64.S |   36 ++++++++++++++++++++++++++++++++++++
+ tools/objtool/check.c     |    1 +
+ 3 files changed, 60 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/lib/copy_mc.c
++++ b/arch/x86/lib/copy_mc.c
+@@ -45,6 +45,8 @@ void enable_copy_mc_fragile(void)
+ #define copy_mc_fragile_enabled (0)
+ #endif
++unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
++
+ /**
+  * copy_mc_to_kernel - memory copy that handles source exceptions
+  *
+@@ -52,9 +54,11 @@ void enable_copy_mc_fragile(void)
+  * @src:      source address
+  * @len:      number of bytes to copy
+  *
+- * Call into the 'fragile' version on systems that have trouble
+- * actually do machine check recovery. Everyone else can just
+- * use memcpy().
++ * Call into the 'fragile' version on systems that benefit from avoiding
++ * corner case poison consumption scenarios, For example, accessing
++ * poison across 2 cachelines with a single instruction. Almost all
++ * other uses case can use copy_mc_enhanced_fast_string() for a fast
++ * recoverable copy, or fallback to plain memcpy.
+  *
+  * Return 0 for success, or number of bytes not copied if there was an
+  * exception.
+@@ -63,6 +67,8 @@ unsigned long __must_check copy_mc_to_ke
+ {
+       if (copy_mc_fragile_enabled)
+               return copy_mc_fragile(dst, src, len);
++      if (static_cpu_has(X86_FEATURE_ERMS))
++              return copy_mc_enhanced_fast_string(dst, src, len);
+       memcpy(dst, src, len);
+       return 0;
+ }
+@@ -72,11 +78,19 @@ unsigned long __must_check copy_mc_to_us
+ {
+       unsigned long ret;
+-      if (!copy_mc_fragile_enabled)
+-              return copy_user_generic(dst, src, len);
++      if (copy_mc_fragile_enabled) {
++              __uaccess_begin();
++              ret = copy_mc_fragile(dst, src, len);
++              __uaccess_end();
++              return ret;
++      }
++
++      if (static_cpu_has(X86_FEATURE_ERMS)) {
++              __uaccess_begin();
++              ret = copy_mc_enhanced_fast_string(dst, src, len);
++              __uaccess_end();
++              return ret;
++      }
+-      __uaccess_begin();
+-      ret = copy_mc_fragile(dst, src, len);
+-      __uaccess_end();
+-      return ret;
++      return copy_user_generic(dst, src, len);
+ }
+--- a/arch/x86/lib/copy_mc_64.S
++++ b/arch/x86/lib/copy_mc_64.S
+@@ -124,4 +124,40 @@ EXPORT_SYMBOL_GPL(copy_mc_fragile)
+       _ASM_EXTABLE(.L_write_words, .E_write_words)
+       _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+ #endif /* CONFIG_X86_MCE */
++
++/*
++ * copy_mc_enhanced_fast_string - memory copy with exception handling
++ *
++ * Fast string copy + fault / exception handling. If the CPU does
++ * support machine check exception recovery, but does not support
++ * recovering from fast-string exceptions then this CPU needs to be
++ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
++ * machine check recovery support this version should be no slower than
++ * standard memcpy.
++ */
++SYM_FUNC_START(copy_mc_enhanced_fast_string)
++      movq %rdi, %rax
++      movq %rdx, %rcx
++.L_copy:
++      rep movsb
++      /* Copy successful. Return zero */
++      xorl %eax, %eax
++      ret
++SYM_FUNC_END(copy_mc_enhanced_fast_string)
++
++      .section .fixup, "ax"
++.E_copy:
++      /*
++       * On fault %rcx is updated such that the copy instruction could
++       * optionally be restarted at the fault position, i.e. it
++       * contains 'bytes remaining'. A non-zero return indicates error
++       * to copy_mc_generic() users, or indicate short transfers to
++       * user-copy routines.
++       */
++      movq %rcx, %rax
++      ret
++
++      .previous
++
++      _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
+ #endif /* !CONFIG_UML */
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -550,6 +550,7 @@ static const char *uaccess_safe_builtin[
+       "csum_partial_copy_generic",
+       "copy_mc_fragile",
+       "copy_mc_fragile_handle_tail",
++      "copy_mc_enhanced_fast_string",
+       "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
+       NULL
+ };
diff --git a/queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch b/queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch
new file mode 100644 (file)
index 0000000..48efc32
--- /dev/null
@@ -0,0 +1,48 @@
+From 035fff1f7aab43e420e0098f0854470a5286fb83 Mon Sep 17 00:00:00 2001
+From: Randy Dunlap <rdunlap@infradead.org>
+Date: Fri, 21 Aug 2020 17:10:27 -0700
+Subject: x86/PCI: Fix intel_mid_pci.c build error when ACPI is not enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+commit 035fff1f7aab43e420e0098f0854470a5286fb83 upstream.
+
+Fix build error when CONFIG_ACPI is not set/enabled by adding the header
+file <asm/acpi.h> which contains a stub for the function in the build
+error.
+
+    ../arch/x86/pci/intel_mid_pci.c: In function â€˜intel_mid_pci_init’:
+    ../arch/x86/pci/intel_mid_pci.c:303:2: error: implicit declaration of function â€˜acpi_noirq_set’; did you mean â€˜acpi_irq_get’? [-Werror=implicit-function-declaration]
+      acpi_noirq_set();
+
+Fixes: a912a7584ec3 ("x86/platform/intel-mid: Move PCI initialization to arch_init()")
+Link: https://lore.kernel.org/r/ea903917-e51b-4cc9-2680-bc1e36efa026@infradead.org
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Reviewed-by: Jesse Barnes <jsbarnes@google.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org     # v4.16+
+Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Cc: Len Brown <lenb@kernel.org>
+Cc: Jesse Barnes <jsbarnes@google.com>
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/pci/intel_mid_pci.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/pci/intel_mid_pci.c
++++ b/arch/x86/pci/intel_mid_pci.c
+@@ -33,6 +33,7 @@
+ #include <asm/hw_irq.h>
+ #include <asm/io_apic.h>
+ #include <asm/intel-mid.h>
++#include <asm/acpi.h>
+ #define PCIE_CAP_OFFSET       0x100
diff --git a/queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch b/queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch
new file mode 100644 (file)
index 0000000..d55f497
--- /dev/null
@@ -0,0 +1,2346 @@
+From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Mon, 5 Oct 2020 20:40:16 -0700
+Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit ec6347bb43395cb92126788a1a5b25302543f815 upstream.
+
+In reaction to a proposal to introduce a memcpy_mcsafe_fast()
+implementation Linus points out that memcpy_mcsafe() is poorly named
+relative to communicating the scope of the interface. Specifically what
+addresses are valid to pass as source, destination, and what faults /
+exceptions are handled.
+
+Of particular concern is that even though x86 might be able to handle
+the semantics of copy_mc_to_user() with its common copy_user_generic()
+implementation other archs likely need / want an explicit path for this
+case:
+
+  On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
+  >
+  > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
+  > >
+  > > However now I see that copy_user_generic() works for the wrong reason.
+  > > It works because the exception on the source address due to poison
+  > > looks no different than a write fault on the user address to the
+  > > caller, it's still just a short copy. So it makes copy_to_user() work
+  > > for the wrong reason relative to the name.
+  >
+  > Right.
+  >
+  > And it won't work that way on other architectures. On x86, we have a
+  > generic function that can take faults on either side, and we use it
+  > for both cases (and for the "in_user" case too), but that's an
+  > artifact of the architecture oddity.
+  >
+  > In fact, it's probably wrong even on x86 - because it can hide bugs -
+  > but writing those things is painful enough that everybody prefers
+  > having just one function.
+
+Replace a single top-level memcpy_mcsafe() with either
+copy_mc_to_user(), or copy_mc_to_kernel().
+
+Introduce an x86 copy_mc_fragile() name as the rename for the
+low-level x86 implementation formerly named memcpy_mcsafe(). It is used
+as the slow / careful backend that is supplanted by a fast
+copy_mc_generic() in a follow-on patch.
+
+One side-effect of this reorganization is that separating copy_mc_64.S
+to its own file means that perf no longer needs to track dependencies
+for its memcpy_64.S benchmarks.
+
+ [ bp: Massage a bit. ]
+
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Michael Ellerman <mpe@ellerman.id.au>
+Cc: <stable@vger.kernel.org>
+Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
+Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/Kconfig                                   |    2 
+ arch/powerpc/include/asm/string.h                      |    2 
+ arch/powerpc/include/asm/uaccess.h                     |   40 +-
+ arch/powerpc/lib/Makefile                              |    2 
+ arch/powerpc/lib/copy_mc_64.S                          |  242 +++++++++++++++++
+ arch/powerpc/lib/memcpy_mcsafe_64.S                    |  242 -----------------
+ arch/x86/Kconfig                                       |    2 
+ arch/x86/Kconfig.debug                                 |    2 
+ arch/x86/include/asm/copy_mc_test.h                    |   75 +++++
+ arch/x86/include/asm/mce.h                             |    9 
+ arch/x86/include/asm/mcsafe_test.h                     |   75 -----
+ arch/x86/include/asm/string_64.h                       |   32 --
+ arch/x86/include/asm/uaccess.h                         |    9 
+ arch/x86/include/asm/uaccess_64.h                      |   20 -
+ arch/x86/kernel/cpu/mce/core.c                         |    8 
+ arch/x86/kernel/quirks.c                               |   10 
+ arch/x86/lib/Makefile                                  |    1 
+ arch/x86/lib/copy_mc.c                                 |   82 +++++
+ arch/x86/lib/copy_mc_64.S                              |  127 ++++++++
+ arch/x86/lib/memcpy_64.S                               |  115 --------
+ arch/x86/lib/usercopy_64.c                             |   21 -
+ drivers/md/dm-writecache.c                             |   15 -
+ drivers/nvdimm/claim.c                                 |    2 
+ drivers/nvdimm/pmem.c                                  |    6 
+ include/linux/string.h                                 |    9 
+ include/linux/uaccess.h                                |   13 
+ include/linux/uio.h                                    |   10 
+ lib/Kconfig                                            |    7 
+ lib/iov_iter.c                                         |   48 +--
+ tools/arch/x86/include/asm/mcsafe_test.h               |   13 
+ tools/arch/x86/lib/memcpy_64.S                         |  115 --------
+ tools/objtool/check.c                                  |    4 
+ tools/perf/bench/Build                                 |    1 
+ tools/perf/bench/mem-memcpy-x86-64-lib.c               |   24 -
+ tools/testing/nvdimm/test/nfit.c                       |   49 +--
+ tools/testing/selftests/powerpc/copyloops/.gitignore   |    2 
+ tools/testing/selftests/powerpc/copyloops/Makefile     |    6 
+ tools/testing/selftests/powerpc/copyloops/copy_mc_64.S |  242 +++++++++++++++++
+ 38 files changed, 914 insertions(+), 770 deletions(-)
+
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -135,7 +135,7 @@ config PPC
+       select ARCH_HAS_STRICT_KERNEL_RWX       if (PPC32 && !HIBERNATION)
+       select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
+       select ARCH_HAS_UACCESS_FLUSHCACHE
+-      select ARCH_HAS_UACCESS_MCSAFE          if PPC64
++      select ARCH_HAS_COPY_MC                 if PPC64
+       select ARCH_HAS_UBSAN_SANITIZE_ALL
+       select ARCH_HAVE_NMI_SAFE_CMPXCHG
+       select ARCH_KEEP_MEMBLOCK
+--- a/arch/powerpc/include/asm/string.h
++++ b/arch/powerpc/include/asm/string.h
+@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *fr
+ #ifndef CONFIG_KASAN
+ #define __HAVE_ARCH_MEMSET32
+ #define __HAVE_ARCH_MEMSET64
+-#define __HAVE_ARCH_MEMCPY_MCSAFE
+-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
+ extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+ extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+ extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+--- a/arch/powerpc/include/asm/uaccess.h
++++ b/arch/powerpc/include/asm/uaccess.h
+@@ -436,6 +436,32 @@ do {                                                              \
+ extern unsigned long __copy_tofrom_user(void __user *to,
+               const void __user *from, unsigned long size);
++#ifdef CONFIG_ARCH_HAS_COPY_MC
++unsigned long __must_check
++copy_mc_generic(void *to, const void *from, unsigned long size);
++
++static inline unsigned long __must_check
++copy_mc_to_kernel(void *to, const void *from, unsigned long size)
++{
++      return copy_mc_generic(to, from, size);
++}
++#define copy_mc_to_kernel copy_mc_to_kernel
++
++static inline unsigned long __must_check
++copy_mc_to_user(void __user *to, const void *from, unsigned long n)
++{
++      if (likely(check_copy_size(from, n, true))) {
++              if (access_ok(to, n)) {
++                      allow_write_to_user(to, n);
++                      n = copy_mc_generic((void *)to, from, n);
++                      prevent_write_to_user(to, n);
++              }
++      }
++
++      return n;
++}
++#endif
++
+ #ifdef __powerpc64__
+ static inline unsigned long
+ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
+@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const
+       return ret;
+ }
+-static __always_inline unsigned long __must_check
+-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
+-{
+-      if (likely(check_copy_size(from, n, true))) {
+-              if (access_ok(to, n)) {
+-                      allow_write_to_user(to, n);
+-                      n = memcpy_mcsafe((void *)to, from, n);
+-                      prevent_write_to_user(to, n);
+-              }
+-      }
+-
+-      return n;
+-}
+-
+ unsigned long __arch_clear_user(void __user *addr, unsigned long size);
+ static inline unsigned long clear_user(void __user *addr, unsigned long size)
+--- a/arch/powerpc/lib/Makefile
++++ b/arch/powerpc/lib/Makefile
+@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_
+                              memcpy_power7.o
+ obj64-y       += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
+-         memcpy_64.o memcpy_mcsafe_64.o
++         memcpy_64.o copy_mc_64.o
+ obj64-$(CONFIG_SMP)   += locks.o
+ obj64-$(CONFIG_ALTIVEC)       += vmx-helper.o
+--- /dev/null
++++ b/arch/powerpc/lib/copy_mc_64.S
+@@ -0,0 +1,242 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) IBM Corporation, 2011
++ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
++ * Author - Balbir Singh <bsingharora@gmail.com>
++ */
++#include <asm/ppc_asm.h>
++#include <asm/errno.h>
++#include <asm/export.h>
++
++      .macro err1
++100:
++      EX_TABLE(100b,.Ldo_err1)
++      .endm
++
++      .macro err2
++200:
++      EX_TABLE(200b,.Ldo_err2)
++      .endm
++
++      .macro err3
++300:  EX_TABLE(300b,.Ldone)
++      .endm
++
++.Ldo_err2:
++      ld      r22,STK_REG(R22)(r1)
++      ld      r21,STK_REG(R21)(r1)
++      ld      r20,STK_REG(R20)(r1)
++      ld      r19,STK_REG(R19)(r1)
++      ld      r18,STK_REG(R18)(r1)
++      ld      r17,STK_REG(R17)(r1)
++      ld      r16,STK_REG(R16)(r1)
++      ld      r15,STK_REG(R15)(r1)
++      ld      r14,STK_REG(R14)(r1)
++      addi    r1,r1,STACKFRAMESIZE
++.Ldo_err1:
++      /* Do a byte by byte copy to get the exact remaining size */
++      mtctr   r7
++46:
++err3; lbz     r0,0(r4)
++      addi    r4,r4,1
++err3; stb     r0,0(r3)
++      addi    r3,r3,1
++      bdnz    46b
++      li      r3,0
++      blr
++
++.Ldone:
++      mfctr   r3
++      blr
++
++
++_GLOBAL(copy_mc_generic)
++      mr      r7,r5
++      cmpldi  r5,16
++      blt     .Lshort_copy
++
++.Lcopy:
++      /* Get the source 8B aligned */
++      neg     r6,r4
++      mtocrf  0x01,r6
++      clrldi  r6,r6,(64-3)
++
++      bf      cr7*4+3,1f
++err1; lbz     r0,0(r4)
++      addi    r4,r4,1
++err1; stb     r0,0(r3)
++      addi    r3,r3,1
++      subi    r7,r7,1
++
++1:    bf      cr7*4+2,2f
++err1; lhz     r0,0(r4)
++      addi    r4,r4,2
++err1; sth     r0,0(r3)
++      addi    r3,r3,2
++      subi    r7,r7,2
++
++2:    bf      cr7*4+1,3f
++err1; lwz     r0,0(r4)
++      addi    r4,r4,4
++err1; stw     r0,0(r3)
++      addi    r3,r3,4
++      subi    r7,r7,4
++
++3:    sub     r5,r5,r6
++      cmpldi  r5,128
++
++      mflr    r0
++      stdu    r1,-STACKFRAMESIZE(r1)
++      std     r14,STK_REG(R14)(r1)
++      std     r15,STK_REG(R15)(r1)
++      std     r16,STK_REG(R16)(r1)
++      std     r17,STK_REG(R17)(r1)
++      std     r18,STK_REG(R18)(r1)
++      std     r19,STK_REG(R19)(r1)
++      std     r20,STK_REG(R20)(r1)
++      std     r21,STK_REG(R21)(r1)
++      std     r22,STK_REG(R22)(r1)
++      std     r0,STACKFRAMESIZE+16(r1)
++
++      blt     5f
++      srdi    r6,r5,7
++      mtctr   r6
++
++      /* Now do cacheline (128B) sized loads and stores. */
++      .align  5
++4:
++err2; ld      r0,0(r4)
++err2; ld      r6,8(r4)
++err2; ld      r8,16(r4)
++err2; ld      r9,24(r4)
++err2; ld      r10,32(r4)
++err2; ld      r11,40(r4)
++err2; ld      r12,48(r4)
++err2; ld      r14,56(r4)
++err2; ld      r15,64(r4)
++err2; ld      r16,72(r4)
++err2; ld      r17,80(r4)
++err2; ld      r18,88(r4)
++err2; ld      r19,96(r4)
++err2; ld      r20,104(r4)
++err2; ld      r21,112(r4)
++err2; ld      r22,120(r4)
++      addi    r4,r4,128
++err2; std     r0,0(r3)
++err2; std     r6,8(r3)
++err2; std     r8,16(r3)
++err2; std     r9,24(r3)
++err2; std     r10,32(r3)
++err2; std     r11,40(r3)
++err2; std     r12,48(r3)
++err2; std     r14,56(r3)
++err2; std     r15,64(r3)
++err2; std     r16,72(r3)
++err2; std     r17,80(r3)
++err2; std     r18,88(r3)
++err2; std     r19,96(r3)
++err2; std     r20,104(r3)
++err2; std     r21,112(r3)
++err2; std     r22,120(r3)
++      addi    r3,r3,128
++      subi    r7,r7,128
++      bdnz    4b
++
++      clrldi  r5,r5,(64-7)
++
++      /* Up to 127B to go */
++5:    srdi    r6,r5,4
++      mtocrf  0x01,r6
++
++6:    bf      cr7*4+1,7f
++err2; ld      r0,0(r4)
++err2; ld      r6,8(r4)
++err2; ld      r8,16(r4)
++err2; ld      r9,24(r4)
++err2; ld      r10,32(r4)
++err2; ld      r11,40(r4)
++err2; ld      r12,48(r4)
++err2; ld      r14,56(r4)
++      addi    r4,r4,64
++err2; std     r0,0(r3)
++err2; std     r6,8(r3)
++err2; std     r8,16(r3)
++err2; std     r9,24(r3)
++err2; std     r10,32(r3)
++err2; std     r11,40(r3)
++err2; std     r12,48(r3)
++err2; std     r14,56(r3)
++      addi    r3,r3,64
++      subi    r7,r7,64
++
++7:    ld      r14,STK_REG(R14)(r1)
++      ld      r15,STK_REG(R15)(r1)
++      ld      r16,STK_REG(R16)(r1)
++      ld      r17,STK_REG(R17)(r1)
++      ld      r18,STK_REG(R18)(r1)
++      ld      r19,STK_REG(R19)(r1)
++      ld      r20,STK_REG(R20)(r1)
++      ld      r21,STK_REG(R21)(r1)
++      ld      r22,STK_REG(R22)(r1)
++      addi    r1,r1,STACKFRAMESIZE
++
++      /* Up to 63B to go */
++      bf      cr7*4+2,8f
++err1; ld      r0,0(r4)
++err1; ld      r6,8(r4)
++err1; ld      r8,16(r4)
++err1; ld      r9,24(r4)
++      addi    r4,r4,32
++err1; std     r0,0(r3)
++err1; std     r6,8(r3)
++err1; std     r8,16(r3)
++err1; std     r9,24(r3)
++      addi    r3,r3,32
++      subi    r7,r7,32
++
++      /* Up to 31B to go */
++8:    bf      cr7*4+3,9f
++err1; ld      r0,0(r4)
++err1; ld      r6,8(r4)
++      addi    r4,r4,16
++err1; std     r0,0(r3)
++err1; std     r6,8(r3)
++      addi    r3,r3,16
++      subi    r7,r7,16
++
++9:    clrldi  r5,r5,(64-4)
++
++      /* Up to 15B to go */
++.Lshort_copy:
++      mtocrf  0x01,r5
++      bf      cr7*4+0,12f
++err1; lwz     r0,0(r4)        /* Less chance of a reject with word ops */
++err1; lwz     r6,4(r4)
++      addi    r4,r4,8
++err1; stw     r0,0(r3)
++err1; stw     r6,4(r3)
++      addi    r3,r3,8
++      subi    r7,r7,8
++
++12:   bf      cr7*4+1,13f
++err1; lwz     r0,0(r4)
++      addi    r4,r4,4
++err1; stw     r0,0(r3)
++      addi    r3,r3,4
++      subi    r7,r7,4
++
++13:   bf      cr7*4+2,14f
++err1; lhz     r0,0(r4)
++      addi    r4,r4,2
++err1; sth     r0,0(r3)
++      addi    r3,r3,2
++      subi    r7,r7,2
++
++14:   bf      cr7*4+3,15f
++err1; lbz     r0,0(r4)
++err1; stb     r0,0(r3)
++
++15:   li      r3,0
++      blr
++
++EXPORT_SYMBOL_GPL(copy_mc_generic);
+--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
++++ /dev/null
+@@ -1,242 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * Copyright (C) IBM Corporation, 2011
+- * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+- * Author - Balbir Singh <bsingharora@gmail.com>
+- */
+-#include <asm/ppc_asm.h>
+-#include <asm/errno.h>
+-#include <asm/export.h>
+-
+-      .macro err1
+-100:
+-      EX_TABLE(100b,.Ldo_err1)
+-      .endm
+-
+-      .macro err2
+-200:
+-      EX_TABLE(200b,.Ldo_err2)
+-      .endm
+-
+-      .macro err3
+-300:  EX_TABLE(300b,.Ldone)
+-      .endm
+-
+-.Ldo_err2:
+-      ld      r22,STK_REG(R22)(r1)
+-      ld      r21,STK_REG(R21)(r1)
+-      ld      r20,STK_REG(R20)(r1)
+-      ld      r19,STK_REG(R19)(r1)
+-      ld      r18,STK_REG(R18)(r1)
+-      ld      r17,STK_REG(R17)(r1)
+-      ld      r16,STK_REG(R16)(r1)
+-      ld      r15,STK_REG(R15)(r1)
+-      ld      r14,STK_REG(R14)(r1)
+-      addi    r1,r1,STACKFRAMESIZE
+-.Ldo_err1:
+-      /* Do a byte by byte copy to get the exact remaining size */
+-      mtctr   r7
+-46:
+-err3; lbz     r0,0(r4)
+-      addi    r4,r4,1
+-err3; stb     r0,0(r3)
+-      addi    r3,r3,1
+-      bdnz    46b
+-      li      r3,0
+-      blr
+-
+-.Ldone:
+-      mfctr   r3
+-      blr
+-
+-
+-_GLOBAL(memcpy_mcsafe)
+-      mr      r7,r5
+-      cmpldi  r5,16
+-      blt     .Lshort_copy
+-
+-.Lcopy:
+-      /* Get the source 8B aligned */
+-      neg     r6,r4
+-      mtocrf  0x01,r6
+-      clrldi  r6,r6,(64-3)
+-
+-      bf      cr7*4+3,1f
+-err1; lbz     r0,0(r4)
+-      addi    r4,r4,1
+-err1; stb     r0,0(r3)
+-      addi    r3,r3,1
+-      subi    r7,r7,1
+-
+-1:    bf      cr7*4+2,2f
+-err1; lhz     r0,0(r4)
+-      addi    r4,r4,2
+-err1; sth     r0,0(r3)
+-      addi    r3,r3,2
+-      subi    r7,r7,2
+-
+-2:    bf      cr7*4+1,3f
+-err1; lwz     r0,0(r4)
+-      addi    r4,r4,4
+-err1; stw     r0,0(r3)
+-      addi    r3,r3,4
+-      subi    r7,r7,4
+-
+-3:    sub     r5,r5,r6
+-      cmpldi  r5,128
+-
+-      mflr    r0
+-      stdu    r1,-STACKFRAMESIZE(r1)
+-      std     r14,STK_REG(R14)(r1)
+-      std     r15,STK_REG(R15)(r1)
+-      std     r16,STK_REG(R16)(r1)
+-      std     r17,STK_REG(R17)(r1)
+-      std     r18,STK_REG(R18)(r1)
+-      std     r19,STK_REG(R19)(r1)
+-      std     r20,STK_REG(R20)(r1)
+-      std     r21,STK_REG(R21)(r1)
+-      std     r22,STK_REG(R22)(r1)
+-      std     r0,STACKFRAMESIZE+16(r1)
+-
+-      blt     5f
+-      srdi    r6,r5,7
+-      mtctr   r6
+-
+-      /* Now do cacheline (128B) sized loads and stores. */
+-      .align  5
+-4:
+-err2; ld      r0,0(r4)
+-err2; ld      r6,8(r4)
+-err2; ld      r8,16(r4)
+-err2; ld      r9,24(r4)
+-err2; ld      r10,32(r4)
+-err2; ld      r11,40(r4)
+-err2; ld      r12,48(r4)
+-err2; ld      r14,56(r4)
+-err2; ld      r15,64(r4)
+-err2; ld      r16,72(r4)
+-err2; ld      r17,80(r4)
+-err2; ld      r18,88(r4)
+-err2; ld      r19,96(r4)
+-err2; ld      r20,104(r4)
+-err2; ld      r21,112(r4)
+-err2; ld      r22,120(r4)
+-      addi    r4,r4,128
+-err2; std     r0,0(r3)
+-err2; std     r6,8(r3)
+-err2; std     r8,16(r3)
+-err2; std     r9,24(r3)
+-err2; std     r10,32(r3)
+-err2; std     r11,40(r3)
+-err2; std     r12,48(r3)
+-err2; std     r14,56(r3)
+-err2; std     r15,64(r3)
+-err2; std     r16,72(r3)
+-err2; std     r17,80(r3)
+-err2; std     r18,88(r3)
+-err2; std     r19,96(r3)
+-err2; std     r20,104(r3)
+-err2; std     r21,112(r3)
+-err2; std     r22,120(r3)
+-      addi    r3,r3,128
+-      subi    r7,r7,128
+-      bdnz    4b
+-
+-      clrldi  r5,r5,(64-7)
+-
+-      /* Up to 127B to go */
+-5:    srdi    r6,r5,4
+-      mtocrf  0x01,r6
+-
+-6:    bf      cr7*4+1,7f
+-err2; ld      r0,0(r4)
+-err2; ld      r6,8(r4)
+-err2; ld      r8,16(r4)
+-err2; ld      r9,24(r4)
+-err2; ld      r10,32(r4)
+-err2; ld      r11,40(r4)
+-err2; ld      r12,48(r4)
+-err2; ld      r14,56(r4)
+-      addi    r4,r4,64
+-err2; std     r0,0(r3)
+-err2; std     r6,8(r3)
+-err2; std     r8,16(r3)
+-err2; std     r9,24(r3)
+-err2; std     r10,32(r3)
+-err2; std     r11,40(r3)
+-err2; std     r12,48(r3)
+-err2; std     r14,56(r3)
+-      addi    r3,r3,64
+-      subi    r7,r7,64
+-
+-7:    ld      r14,STK_REG(R14)(r1)
+-      ld      r15,STK_REG(R15)(r1)
+-      ld      r16,STK_REG(R16)(r1)
+-      ld      r17,STK_REG(R17)(r1)
+-      ld      r18,STK_REG(R18)(r1)
+-      ld      r19,STK_REG(R19)(r1)
+-      ld      r20,STK_REG(R20)(r1)
+-      ld      r21,STK_REG(R21)(r1)
+-      ld      r22,STK_REG(R22)(r1)
+-      addi    r1,r1,STACKFRAMESIZE
+-
+-      /* Up to 63B to go */
+-      bf      cr7*4+2,8f
+-err1; ld      r0,0(r4)
+-err1; ld      r6,8(r4)
+-err1; ld      r8,16(r4)
+-err1; ld      r9,24(r4)
+-      addi    r4,r4,32
+-err1; std     r0,0(r3)
+-err1; std     r6,8(r3)
+-err1; std     r8,16(r3)
+-err1; std     r9,24(r3)
+-      addi    r3,r3,32
+-      subi    r7,r7,32
+-
+-      /* Up to 31B to go */
+-8:    bf      cr7*4+3,9f
+-err1; ld      r0,0(r4)
+-err1; ld      r6,8(r4)
+-      addi    r4,r4,16
+-err1; std     r0,0(r3)
+-err1; std     r6,8(r3)
+-      addi    r3,r3,16
+-      subi    r7,r7,16
+-
+-9:    clrldi  r5,r5,(64-4)
+-
+-      /* Up to 15B to go */
+-.Lshort_copy:
+-      mtocrf  0x01,r5
+-      bf      cr7*4+0,12f
+-err1; lwz     r0,0(r4)        /* Less chance of a reject with word ops */
+-err1; lwz     r6,4(r4)
+-      addi    r4,r4,8
+-err1; stw     r0,0(r3)
+-err1; stw     r6,4(r3)
+-      addi    r3,r3,8
+-      subi    r7,r7,8
+-
+-12:   bf      cr7*4+1,13f
+-err1; lwz     r0,0(r4)
+-      addi    r4,r4,4
+-err1; stw     r0,0(r3)
+-      addi    r3,r3,4
+-      subi    r7,r7,4
+-
+-13:   bf      cr7*4+2,14f
+-err1; lhz     r0,0(r4)
+-      addi    r4,r4,2
+-err1; sth     r0,0(r3)
+-      addi    r3,r3,2
+-      subi    r7,r7,2
+-
+-14:   bf      cr7*4+3,15f
+-err1; lbz     r0,0(r4)
+-err1; stb     r0,0(r3)
+-
+-15:   li      r3,0
+-      blr
+-
+-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -75,7 +75,7 @@ config X86
+       select ARCH_HAS_PTE_DEVMAP              if X86_64
+       select ARCH_HAS_PTE_SPECIAL
+       select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
+-      select ARCH_HAS_UACCESS_MCSAFE          if X86_64 && X86_MCE
++      select ARCH_HAS_COPY_MC                 if X86_64
+       select ARCH_HAS_SET_MEMORY
+       select ARCH_HAS_SET_DIRECT_MAP
+       select ARCH_HAS_STRICT_KERNEL_RWX
+--- a/arch/x86/Kconfig.debug
++++ b/arch/x86/Kconfig.debug
+@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC
+         You should normally say N here, unless you want to debug early
+         crashes or need a very simple printk logging facility.
+-config MCSAFE_TEST
++config COPY_MC_TEST
+       def_bool n
+ config EFI_PGT_DUMP
+--- /dev/null
++++ b/arch/x86/include/asm/copy_mc_test.h
+@@ -0,0 +1,75 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _COPY_MC_TEST_H_
++#define _COPY_MC_TEST_H_
++
++#ifndef __ASSEMBLY__
++#ifdef CONFIG_COPY_MC_TEST
++extern unsigned long copy_mc_test_src;
++extern unsigned long copy_mc_test_dst;
++
++static inline void copy_mc_inject_src(void *addr)
++{
++      if (addr)
++              copy_mc_test_src = (unsigned long) addr;
++      else
++              copy_mc_test_src = ~0UL;
++}
++
++static inline void copy_mc_inject_dst(void *addr)
++{
++      if (addr)
++              copy_mc_test_dst = (unsigned long) addr;
++      else
++              copy_mc_test_dst = ~0UL;
++}
++#else /* CONFIG_COPY_MC_TEST */
++static inline void copy_mc_inject_src(void *addr)
++{
++}
++
++static inline void copy_mc_inject_dst(void *addr)
++{
++}
++#endif /* CONFIG_COPY_MC_TEST */
++
++#else /* __ASSEMBLY__ */
++#include <asm/export.h>
++
++#ifdef CONFIG_COPY_MC_TEST
++.macro COPY_MC_TEST_CTL
++      .pushsection .data
++      .align 8
++      .globl copy_mc_test_src
++      copy_mc_test_src:
++              .quad 0
++      EXPORT_SYMBOL_GPL(copy_mc_test_src)
++      .globl copy_mc_test_dst
++      copy_mc_test_dst:
++              .quad 0
++      EXPORT_SYMBOL_GPL(copy_mc_test_dst)
++      .popsection
++.endm
++
++.macro COPY_MC_TEST_SRC reg count target
++      leaq \count(\reg), %r9
++      cmp copy_mc_test_src, %r9
++      ja \target
++.endm
++
++.macro COPY_MC_TEST_DST reg count target
++      leaq \count(\reg), %r9
++      cmp copy_mc_test_dst, %r9
++      ja \target
++.endm
++#else
++.macro COPY_MC_TEST_CTL
++.endm
++
++.macro COPY_MC_TEST_SRC reg count target
++.endm
++
++.macro COPY_MC_TEST_DST reg count target
++.endm
++#endif /* CONFIG_COPY_MC_TEST */
++#endif /* __ASSEMBLY__ */
++#endif /* _COPY_MC_TEST_H_ */
+--- a/arch/x86/include/asm/mce.h
++++ b/arch/x86/include/asm/mce.h
+@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(
+ extern int mce_p5_enabled;
++#ifdef CONFIG_ARCH_HAS_COPY_MC
++extern void enable_copy_mc_fragile(void);
++unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
++#else
++static inline void enable_copy_mc_fragile(void)
++{
++}
++#endif
++
+ #ifdef CONFIG_X86_MCE
+ int mcheck_init(void);
+ void mcheck_cpu_init(struct cpuinfo_x86 *c);
+--- a/arch/x86/include/asm/mcsafe_test.h
++++ /dev/null
+@@ -1,75 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _MCSAFE_TEST_H_
+-#define _MCSAFE_TEST_H_
+-
+-#ifndef __ASSEMBLY__
+-#ifdef CONFIG_MCSAFE_TEST
+-extern unsigned long mcsafe_test_src;
+-extern unsigned long mcsafe_test_dst;
+-
+-static inline void mcsafe_inject_src(void *addr)
+-{
+-      if (addr)
+-              mcsafe_test_src = (unsigned long) addr;
+-      else
+-              mcsafe_test_src = ~0UL;
+-}
+-
+-static inline void mcsafe_inject_dst(void *addr)
+-{
+-      if (addr)
+-              mcsafe_test_dst = (unsigned long) addr;
+-      else
+-              mcsafe_test_dst = ~0UL;
+-}
+-#else /* CONFIG_MCSAFE_TEST */
+-static inline void mcsafe_inject_src(void *addr)
+-{
+-}
+-
+-static inline void mcsafe_inject_dst(void *addr)
+-{
+-}
+-#endif /* CONFIG_MCSAFE_TEST */
+-
+-#else /* __ASSEMBLY__ */
+-#include <asm/export.h>
+-
+-#ifdef CONFIG_MCSAFE_TEST
+-.macro MCSAFE_TEST_CTL
+-      .pushsection .data
+-      .align 8
+-      .globl mcsafe_test_src
+-      mcsafe_test_src:
+-              .quad 0
+-      EXPORT_SYMBOL_GPL(mcsafe_test_src)
+-      .globl mcsafe_test_dst
+-      mcsafe_test_dst:
+-              .quad 0
+-      EXPORT_SYMBOL_GPL(mcsafe_test_dst)
+-      .popsection
+-.endm
+-
+-.macro MCSAFE_TEST_SRC reg count target
+-      leaq \count(\reg), %r9
+-      cmp mcsafe_test_src, %r9
+-      ja \target
+-.endm
+-
+-.macro MCSAFE_TEST_DST reg count target
+-      leaq \count(\reg), %r9
+-      cmp mcsafe_test_dst, %r9
+-      ja \target
+-.endm
+-#else
+-.macro MCSAFE_TEST_CTL
+-.endm
+-
+-.macro MCSAFE_TEST_SRC reg count target
+-.endm
+-
+-.macro MCSAFE_TEST_DST reg count target
+-.endm
+-#endif /* CONFIG_MCSAFE_TEST */
+-#endif /* __ASSEMBLY__ */
+-#endif /* _MCSAFE_TEST_H_ */
+--- a/arch/x86/include/asm/string_64.h
++++ b/arch/x86/include/asm/string_64.h
+@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *c
+ #endif
+-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
+-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
+-              size_t cnt);
+-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+-
+-/**
+- * memcpy_mcsafe - copy memory with indication if a machine check happened
+- *
+- * @dst:      destination address
+- * @src:      source address
+- * @cnt:      number of bytes to copy
+- *
+- * Low level memory copy function that catches machine checks
+- * We only call into the "safe" function on systems that can
+- * actually do machine check recovery. Everyone else can just
+- * use memcpy().
+- *
+- * Return 0 for success, or number of bytes not copied if there was an
+- * exception.
+- */
+-static __always_inline __must_check unsigned long
+-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
+-{
+-#ifdef CONFIG_X86_MCE
+-      if (static_branch_unlikely(&mcsafe_key))
+-              return __memcpy_mcsafe(dst, src, cnt);
+-      else
+-#endif
+-              memcpy(dst, src, cnt);
+-      return 0;
+-}
+-
+ #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+ #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
+ void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(co
+ unsigned long __must_check clear_user(void __user *mem, unsigned long len);
+ unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
++#ifdef CONFIG_ARCH_HAS_COPY_MC
++unsigned long __must_check
++copy_mc_to_kernel(void *to, const void *from, unsigned len);
++#define copy_mc_to_kernel copy_mc_to_kernel
++
++unsigned long __must_check
++copy_mc_to_user(void *to, const void *from, unsigned len);
++#endif
++
+ /*
+  * movsl can be slow when source and dest are not both 8-byte aligned
+  */
+--- a/arch/x86/include/asm/uaccess_64.h
++++ b/arch/x86/include/asm/uaccess_64.h
+@@ -47,22 +47,6 @@ copy_user_generic(void *to, const void *
+ }
+ static __always_inline __must_check unsigned long
+-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+-{
+-      unsigned long ret;
+-
+-      __uaccess_begin();
+-      /*
+-       * Note, __memcpy_mcsafe() is explicitly used since it can
+-       * handle exceptions / faults.  memcpy_mcsafe() may fall back to
+-       * memcpy() which lacks this handling.
+-       */
+-      ret = __memcpy_mcsafe(to, from, len);
+-      __uaccess_end();
+-      return ret;
+-}
+-
+-static __always_inline __must_check unsigned long
+ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
+ {
+       return copy_user_generic(dst, (__force void *)src, size);
+@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, c
+       kasan_check_write(dst, size);
+       return __copy_user_flushcache(dst, src, size);
+ }
+-
+-unsigned long
+-mcsafe_handle_tail(char *to, char *from, unsigned len);
+-
+ #endif /* _ASM_X86_UACCESS_64_H */
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -40,7 +40,6 @@
+ #include <linux/debugfs.h>
+ #include <linux/irq_work.h>
+ #include <linux/export.h>
+-#include <linux/jump_label.h>
+ #include <linux/set_memory.h>
+ #include <linux/task_work.h>
+ #include <linux/hardirq.h>
+@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank)
+       and older.
+  * mce=nobootlog Don't log MCEs from before booting.
+  * mce=bios_cmci_threshold Don't program the CMCI threshold
+- * mce=recovery force enable memcpy_mcsafe()
++ * mce=recovery force enable copy_mc_fragile()
+  */
+ static int __init mcheck_enable(char *str)
+ {
+@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(v
+ static void __init mcheck_debugfs_init(void) { }
+ #endif
+-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
+-EXPORT_SYMBOL_GPL(mcsafe_key);
+-
+ static int __init mcheck_late_init(void)
+ {
+       if (mca_cfg.recovery)
+-              static_branch_inc(&mcsafe_key);
++              enable_copy_mc_fragile();
+       mcheck_debugfs_init();
+--- a/arch/x86/kernel/quirks.c
++++ b/arch/x86/kernel/quirks.c
+@@ -8,6 +8,7 @@
+ #include <asm/hpet.h>
+ #include <asm/setup.h>
++#include <asm/mce.h>
+ #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
+@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
+                       amd_disable_seq_and_redirect_scrub);
+-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+-#include <linux/jump_label.h>
+-#include <asm/string_64.h>
+-
+ /* Ivy Bridge, Haswell, Broadwell */
+ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
+ {
+@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_r
+       pci_read_config_dword(pdev, 0x84, &capid0);
+       if (capid0 & 0x10)
+-              static_branch_inc(&mcsafe_key);
++              enable_copy_mc_fragile();
+ }
+ /* Skylake */
+@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_
+        * enabled, so memory machine check recovery is also enabled.
+        */
+       if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
+-              static_branch_inc(&mcsafe_key);
++              enable_copy_mc_fragile();
+ }
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IN
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
+ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
+ #endif
+-#endif
+ bool x86_apple_machine;
+ EXPORT_SYMBOL(x86_apple_machine);
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp
+ lib-y := delay.o misc.o cmdline.o cpu.o
+ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
+ lib-y += memcpy_$(BITS).o
++lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
+ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
+ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+ lib-$(CONFIG_FUNCTION_ERROR_INJECTION)        += error-inject.o
+--- /dev/null
++++ b/arch/x86/lib/copy_mc.c
+@@ -0,0 +1,82 @@
++// SPDX-License-Identifier: GPL-2.0
++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
++
++#include <linux/jump_label.h>
++#include <linux/uaccess.h>
++#include <linux/export.h>
++#include <linux/string.h>
++#include <linux/types.h>
++
++#include <asm/mce.h>
++
++#ifdef CONFIG_X86_MCE
++/*
++ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
++ * implementation.
++ */
++static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
++
++void enable_copy_mc_fragile(void)
++{
++      static_branch_inc(&copy_mc_fragile_key);
++}
++#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
++
++/*
++ * Similar to copy_user_handle_tail, probe for the write fault point, or
++ * source exception point.
++ */
++__visible notrace unsigned long
++copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
++{
++      for (; len; --len, to++, from++)
++              if (copy_mc_fragile(to, from, 1))
++                      break;
++      return len;
++}
++#else
++/*
++ * No point in doing careful copying, or consulting a static key when
++ * there is no #MC handler in the CONFIG_X86_MCE=n case.
++ */
++void enable_copy_mc_fragile(void)
++{
++}
++#define copy_mc_fragile_enabled (0)
++#endif
++
++/**
++ * copy_mc_to_kernel - memory copy that handles source exceptions
++ *
++ * @dst:      destination address
++ * @src:      source address
++ * @len:      number of bytes to copy
++ *
++ * Call into the 'fragile' version on systems that have trouble
++ * actually do machine check recovery. Everyone else can just
++ * use memcpy().
++ *
++ * Return 0 for success, or number of bytes not copied if there was an
++ * exception.
++ */
++unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
++{
++      if (copy_mc_fragile_enabled)
++              return copy_mc_fragile(dst, src, len);
++      memcpy(dst, src, len);
++      return 0;
++}
++EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
++
++unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
++{
++      unsigned long ret;
++
++      if (!copy_mc_fragile_enabled)
++              return copy_user_generic(dst, src, len);
++
++      __uaccess_begin();
++      ret = copy_mc_fragile(dst, src, len);
++      __uaccess_end();
++      return ret;
++}
+--- /dev/null
++++ b/arch/x86/lib/copy_mc_64.S
+@@ -0,0 +1,127 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
++
++#include <linux/linkage.h>
++#include <asm/copy_mc_test.h>
++#include <asm/export.h>
++#include <asm/asm.h>
++
++#ifndef CONFIG_UML
++
++#ifdef CONFIG_X86_MCE
++COPY_MC_TEST_CTL
++
++/*
++ * copy_mc_fragile - copy memory with indication if an exception / fault happened
++ *
++ * The 'fragile' version is opted into by platform quirks and takes
++ * pains to avoid unrecoverable corner cases like 'fast-string'
++ * instruction sequences, and consuming poison across a cacheline
++ * boundary. The non-fragile version is equivalent to memcpy()
++ * regardless of CPU machine-check-recovery capability.
++ */
++SYM_FUNC_START(copy_mc_fragile)
++      cmpl $8, %edx
++      /* Less than 8 bytes? Go to byte copy loop */
++      jb .L_no_whole_words
++
++      /* Check for bad alignment of source */
++      testl $7, %esi
++      /* Already aligned */
++      jz .L_8byte_aligned
++
++      /* Copy one byte at a time until source is 8-byte aligned */
++      movl %esi, %ecx
++      andl $7, %ecx
++      subl $8, %ecx
++      negl %ecx
++      subl %ecx, %edx
++.L_read_leading_bytes:
++      movb (%rsi), %al
++      COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
++      COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
++.L_write_leading_bytes:
++      movb %al, (%rdi)
++      incq %rsi
++      incq %rdi
++      decl %ecx
++      jnz .L_read_leading_bytes
++
++.L_8byte_aligned:
++      movl %edx, %ecx
++      andl $7, %edx
++      shrl $3, %ecx
++      jz .L_no_whole_words
++
++.L_read_words:
++      movq (%rsi), %r8
++      COPY_MC_TEST_SRC %rsi 8 .E_read_words
++      COPY_MC_TEST_DST %rdi 8 .E_write_words
++.L_write_words:
++      movq %r8, (%rdi)
++      addq $8, %rsi
++      addq $8, %rdi
++      decl %ecx
++      jnz .L_read_words
++
++      /* Any trailing bytes? */
++.L_no_whole_words:
++      andl %edx, %edx
++      jz .L_done_memcpy_trap
++
++      /* Copy trailing bytes */
++      movl %edx, %ecx
++.L_read_trailing_bytes:
++      movb (%rsi), %al
++      COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
++      COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
++.L_write_trailing_bytes:
++      movb %al, (%rdi)
++      incq %rsi
++      incq %rdi
++      decl %ecx
++      jnz .L_read_trailing_bytes
++
++      /* Copy successful. Return zero */
++.L_done_memcpy_trap:
++      xorl %eax, %eax
++.L_done:
++      ret
++SYM_FUNC_END(copy_mc_fragile)
++EXPORT_SYMBOL_GPL(copy_mc_fragile)
++
++      .section .fixup, "ax"
++      /*
++       * Return number of bytes not copied for any failure. Note that
++       * there is no "tail" handling since the source buffer is 8-byte
++       * aligned and poison is cacheline aligned.
++       */
++.E_read_words:
++      shll    $3, %ecx
++.E_leading_bytes:
++      addl    %edx, %ecx
++.E_trailing_bytes:
++      mov     %ecx, %eax
++      jmp     .L_done
++
++      /*
++       * For write fault handling, given the destination is unaligned,
++       * we handle faults on multi-byte writes with a byte-by-byte
++       * copy up to the write-protected page.
++       */
++.E_write_words:
++      shll    $3, %ecx
++      addl    %edx, %ecx
++      movl    %ecx, %edx
++      jmp copy_mc_fragile_handle_tail
++
++      .previous
++
++      _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
++      _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
++      _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
++      _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
++      _ASM_EXTABLE(.L_write_words, .E_write_words)
++      _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
++#endif /* CONFIG_X86_MCE */
++#endif /* !CONFIG_UML */
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -4,7 +4,6 @@
+ #include <linux/linkage.h>
+ #include <asm/errno.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/mcsafe_test.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/export.h>
+@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
+ SYM_FUNC_END(memcpy_orig)
+ .popsection
+-
+-#ifndef CONFIG_UML
+-
+-MCSAFE_TEST_CTL
+-
+-/*
+- * __memcpy_mcsafe - memory copy with machine check exception handling
+- * Note that we only catch machine checks when reading the source addresses.
+- * Writes to target are posted and don't generate machine checks.
+- */
+-SYM_FUNC_START(__memcpy_mcsafe)
+-      cmpl $8, %edx
+-      /* Less than 8 bytes? Go to byte copy loop */
+-      jb .L_no_whole_words
+-
+-      /* Check for bad alignment of source */
+-      testl $7, %esi
+-      /* Already aligned */
+-      jz .L_8byte_aligned
+-
+-      /* Copy one byte at a time until source is 8-byte aligned */
+-      movl %esi, %ecx
+-      andl $7, %ecx
+-      subl $8, %ecx
+-      negl %ecx
+-      subl %ecx, %edx
+-.L_read_leading_bytes:
+-      movb (%rsi), %al
+-      MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
+-      MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
+-.L_write_leading_bytes:
+-      movb %al, (%rdi)
+-      incq %rsi
+-      incq %rdi
+-      decl %ecx
+-      jnz .L_read_leading_bytes
+-
+-.L_8byte_aligned:
+-      movl %edx, %ecx
+-      andl $7, %edx
+-      shrl $3, %ecx
+-      jz .L_no_whole_words
+-
+-.L_read_words:
+-      movq (%rsi), %r8
+-      MCSAFE_TEST_SRC %rsi 8 .E_read_words
+-      MCSAFE_TEST_DST %rdi 8 .E_write_words
+-.L_write_words:
+-      movq %r8, (%rdi)
+-      addq $8, %rsi
+-      addq $8, %rdi
+-      decl %ecx
+-      jnz .L_read_words
+-
+-      /* Any trailing bytes? */
+-.L_no_whole_words:
+-      andl %edx, %edx
+-      jz .L_done_memcpy_trap
+-
+-      /* Copy trailing bytes */
+-      movl %edx, %ecx
+-.L_read_trailing_bytes:
+-      movb (%rsi), %al
+-      MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
+-      MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
+-.L_write_trailing_bytes:
+-      movb %al, (%rdi)
+-      incq %rsi
+-      incq %rdi
+-      decl %ecx
+-      jnz .L_read_trailing_bytes
+-
+-      /* Copy successful. Return zero */
+-.L_done_memcpy_trap:
+-      xorl %eax, %eax
+-.L_done:
+-      ret
+-SYM_FUNC_END(__memcpy_mcsafe)
+-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
+-
+-      .section .fixup, "ax"
+-      /*
+-       * Return number of bytes not copied for any failure. Note that
+-       * there is no "tail" handling since the source buffer is 8-byte
+-       * aligned and poison is cacheline aligned.
+-       */
+-.E_read_words:
+-      shll    $3, %ecx
+-.E_leading_bytes:
+-      addl    %edx, %ecx
+-.E_trailing_bytes:
+-      mov     %ecx, %eax
+-      jmp     .L_done
+-
+-      /*
+-       * For write fault handling, given the destination is unaligned,
+-       * we handle faults on multi-byte writes with a byte-by-byte
+-       * copy up to the write-protected page.
+-       */
+-.E_write_words:
+-      shll    $3, %ecx
+-      addl    %edx, %ecx
+-      movl    %ecx, %edx
+-      jmp mcsafe_handle_tail
+-
+-      .previous
+-
+-      _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+-      _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+-      _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+-      _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+-      _ASM_EXTABLE(.L_write_words, .E_write_words)
+-      _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+-#endif
+--- a/arch/x86/lib/usercopy_64.c
++++ b/arch/x86/lib/usercopy_64.c
+@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to
+ }
+ EXPORT_SYMBOL(clear_user);
+-/*
+- * Similar to copy_user_handle_tail, probe for the write fault point,
+- * but reuse __memcpy_mcsafe in case a new read error is encountered.
+- * clac() is handled in _copy_to_iter_mcsafe().
+- */
+-__visible notrace unsigned long
+-mcsafe_handle_tail(char *to, char *from, unsigned len)
+-{
+-      for (; len; --len, to++, from++) {
+-              /*
+-               * Call the assembly routine back directly since
+-               * memcpy_mcsafe() may silently fallback to memcpy.
+-               */
+-              unsigned long rem = __memcpy_mcsafe(to, from, 1);
+-
+-              if (rem)
+-                      break;
+-      }
+-      return len;
+-}
+-
+ #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+ /**
+  * clean_cache_range - write back a cache range with CLWB
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -49,7 +49,7 @@ do {                                                         \
+ #define pmem_assign(dest, src)        ((dest) = (src))
+ #endif
+-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
++#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
+ #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+ #endif
+@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_
+       }
+       wc->freelist_size = 0;
+-      r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
++      r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
++                            sizeof(uint64_t));
+       if (r) {
+               writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
+               sb_seq_count = cpu_to_le64(0);
+@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_
+                       e->seq_count = -1;
+                       continue;
+               }
+-              r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
++              r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
++                                    sizeof(struct wc_memory_entry));
+               if (r) {
+                       writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
+                                        (unsigned long)b, r);
+@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_wri
+               if (rw == READ) {
+                       int r;
+-                      r = memcpy_mcsafe(buf, data, size);
++                      r = copy_mc_to_kernel(buf, data, size);
+                       flush_dcache_page(bio_page(bio));
+                       if (unlikely(r)) {
+                               writecache_error(wc, r, "hardware memory error when reading data: %d", r);
+@@ -2349,7 +2351,7 @@ invalid_optional:
+               }
+       }
+-      r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
++      r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
+       if (r) {
+               ti->error = "Hardware memory error when reading superblock";
+               goto bad;
+@@ -2360,7 +2362,8 @@ invalid_optional:
+                       ti->error = "Unable to initialize device";
+                       goto bad;
+               }
+-              r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
++              r = copy_mc_to_kernel(&s, sb(wc),
++                                    sizeof(struct wc_memory_superblock));
+               if (r) {
+                       ti->error = "Hardware memory error when reading superblock";
+                       goto bad;
+--- a/drivers/nvdimm/claim.c
++++ b/drivers/nvdimm/claim.c
+@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_names
+       if (rw == READ) {
+               if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
+                       return -EIO;
+-              if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
++              if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
+                       return -EIO;
+               return 0;
+       }
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct pag
+       while (len) {
+               mem = kmap_atomic(page);
+               chunk = min_t(unsigned int, len, PAGE_SIZE - off);
+-              rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
++              rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
+               kunmap_atomic(mem);
+               if (rem)
+                       return BLK_STS_IOERR;
+@@ -305,7 +305,7 @@ static long pmem_dax_direct_access(struc
+ /*
+  * Use the 'no check' versions of copy_from_iter_flushcache() and
+- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
++ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
+  * checking, both file offset and device offset, is handled by
+  * dax_iomap_actor()
+  */
+@@ -318,7 +318,7 @@ static size_t pmem_copy_from_iter(struct
+ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+               void *addr, size_t bytes, struct iov_iter *i)
+ {
+-      return _copy_to_iter_mcsafe(addr, bytes, i);
++      return _copy_mc_to_iter(addr, bytes, i);
+ }
+ static const struct dax_operations pmem_dax_ops = {
+--- a/include/linux/string.h
++++ b/include/linux/string.h
+@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void
+ #ifndef __HAVE_ARCH_MEMCHR
+ extern void * memchr(const void *,int,__kernel_size_t);
+ #endif
+-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
+-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
+-              const void *src, size_t cnt)
+-{
+-      memcpy(dst, src, cnt);
+-      return 0;
+-}
+-#endif
+ #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
+ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+ {
+       memcpy(dst, src, cnt);
+ }
+ #endif
++
+ void *memchr_inv(const void *s, int c, size_t n);
+ char *strreplace(char *s, char old, char new);
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -163,6 +163,19 @@ copy_in_user(void __user *to, const void
+ }
+ #endif
++#ifndef copy_mc_to_kernel
++/*
++ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
++ * #MC (or arch equivalent) during source read.
++ */
++static inline unsigned long __must_check
++copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
++{
++      memcpy(dst, src, cnt);
++      return 0;
++}
++#endif
++
+ static __always_inline void pagefault_disabled_inc(void)
+ {
+       current->pagefault_disabled++;
+--- a/include/linux/uio.h
++++ b/include/linux/uio.h
+@@ -186,10 +186,10 @@ size_t _copy_from_iter_flushcache(void *
+ #define _copy_from_iter_flushcache _copy_from_iter_nocache
+ #endif
+-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
++#ifdef CONFIG_ARCH_HAS_COPY_MC
++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
+ #else
+-#define _copy_to_iter_mcsafe _copy_to_iter
++#define _copy_mc_to_iter _copy_to_iter
+ #endif
+ static __always_inline __must_check
+@@ -202,12 +202,12 @@ size_t copy_from_iter_flushcache(void *a
+ }
+ static __always_inline __must_check
+-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
++size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+ {
+       if (unlikely(!check_copy_size(addr, bytes, true)))
+               return 0;
+       else
+-              return _copy_to_iter_mcsafe(addr, bytes, i);
++              return _copy_mc_to_iter(addr, bytes, i);
+ }
+ size_t iov_iter_zero(size_t bytes, struct iov_iter *);
+--- a/lib/Kconfig
++++ b/lib/Kconfig
+@@ -631,7 +631,12 @@ config UACCESS_MEMCPY
+ config ARCH_HAS_UACCESS_FLUSHCACHE
+       bool
+-config ARCH_HAS_UACCESS_MCSAFE
++# arch has a concept of a recoverable synchronous exception due to a
++# memory-read error like x86 machine-check or ARM data-abort, and
++# implements copy_mc_to_{user,kernel} to abort and report
++# 'bytes-transferred' if that exception fires when accessing the source
++# buffer.
++config ARCH_HAS_COPY_MC
+       bool
+ # Temporary. Goes away when all archs are cleaned up
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -636,30 +636,30 @@ size_t _copy_to_iter(const void *addr, s
+ }
+ EXPORT_SYMBOL(_copy_to_iter);
+-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
++#ifdef CONFIG_ARCH_HAS_COPY_MC
++static int copyout_mc(void __user *to, const void *from, size_t n)
+ {
+       if (access_ok(to, n)) {
+               instrument_copy_to_user(to, from, n);
+-              n = copy_to_user_mcsafe((__force void *) to, from, n);
++              n = copy_mc_to_user((__force void *) to, from, n);
+       }
+       return n;
+ }
+-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
++static unsigned long copy_mc_to_page(struct page *page, size_t offset,
+               const char *from, size_t len)
+ {
+       unsigned long ret;
+       char *to;
+       to = kmap_atomic(page);
+-      ret = memcpy_mcsafe(to + offset, from, len);
++      ret = copy_mc_to_kernel(to + offset, from, len);
+       kunmap_atomic(to);
+       return ret;
+ }
+-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
++static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
+                               struct iov_iter *i)
+ {
+       struct pipe_inode_info *pipe = i->pipe;
+@@ -677,7 +677,7 @@ static size_t copy_pipe_to_iter_mcsafe(c
+               size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+               unsigned long rem;
+-              rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
++              rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
+                                           off, addr, chunk);
+               i->head = i_head;
+               i->iov_offset = off + chunk - rem;
+@@ -694,18 +694,17 @@ static size_t copy_pipe_to_iter_mcsafe(c
+ }
+ /**
+- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
++ * _copy_mc_to_iter - copy to iter with source memory error exception handling
+  * @addr: source kernel address
+  * @bytes: total transfer length
+  * @iter: destination iterator
+  *
+- * The pmem driver arranges for filesystem-dax to use this facility via
+- * dax_copy_to_iter() for protecting read/write to persistent memory.
+- * Unless / until an architecture can guarantee identical performance
+- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
+- * performance regression to switch more users to the mcsafe version.
++ * The pmem driver deploys this for the dax operation
++ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
++ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
++ * successfully copied.
+  *
+- * Otherwise, the main differences between this and typical _copy_to_iter().
++ * The main differences between this and typical _copy_to_iter().
+  *
+  * * Typical tail/residue handling after a fault retries the copy
+  *   byte-by-byte until the fault happens again. Re-triggering machine
+@@ -716,23 +715,22 @@ static size_t copy_pipe_to_iter_mcsafe(c
+  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
+  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
+  *   a short copy.
+- *
+- * See MCSAFE_TEST for self-test.
+  */
+-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+ {
+       const char *from = addr;
+       unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
+       if (unlikely(iov_iter_is_pipe(i)))
+-              return copy_pipe_to_iter_mcsafe(addr, bytes, i);
++              return copy_mc_pipe_to_iter(addr, bytes, i);
+       if (iter_is_iovec(i))
+               might_fault();
+       iterate_and_advance(i, bytes, v,
+-              copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
++              copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
++                         v.iov_len),
+               ({
+-              rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
+-                               (from += v.bv_len) - v.bv_len, v.bv_len);
++              rem = copy_mc_to_page(v.bv_page, v.bv_offset,
++                                    (from += v.bv_len) - v.bv_len, v.bv_len);
+               if (rem) {
+                       curr_addr = (unsigned long) from;
+                       bytes = curr_addr - s_addr - rem;
+@@ -740,8 +738,8 @@ size_t _copy_to_iter_mcsafe(const void *
+               }
+               }),
+               ({
+-              rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
+-                              v.iov_len);
++              rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
++                                      - v.iov_len, v.iov_len);
+               if (rem) {
+                       curr_addr = (unsigned long) from;
+                       bytes = curr_addr - s_addr - rem;
+@@ -752,8 +750,8 @@ size_t _copy_to_iter_mcsafe(const void *
+       return bytes;
+ }
+-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
+-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
++EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
++#endif /* CONFIG_ARCH_HAS_COPY_MC */
+ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+ {
+--- a/tools/arch/x86/include/asm/mcsafe_test.h
++++ /dev/null
+@@ -1,13 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef _MCSAFE_TEST_H_
+-#define _MCSAFE_TEST_H_
+-
+-.macro MCSAFE_TEST_CTL
+-.endm
+-
+-.macro MCSAFE_TEST_SRC reg count target
+-.endm
+-
+-.macro MCSAFE_TEST_DST reg count target
+-.endm
+-#endif /* _MCSAFE_TEST_H_ */
+--- a/tools/arch/x86/lib/memcpy_64.S
++++ b/tools/arch/x86/lib/memcpy_64.S
+@@ -4,7 +4,6 @@
+ #include <linux/linkage.h>
+ #include <asm/errno.h>
+ #include <asm/cpufeatures.h>
+-#include <asm/mcsafe_test.h>
+ #include <asm/alternative-asm.h>
+ #include <asm/export.h>
+@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
+ SYM_FUNC_END(memcpy_orig)
+ .popsection
+-
+-#ifndef CONFIG_UML
+-
+-MCSAFE_TEST_CTL
+-
+-/*
+- * __memcpy_mcsafe - memory copy with machine check exception handling
+- * Note that we only catch machine checks when reading the source addresses.
+- * Writes to target are posted and don't generate machine checks.
+- */
+-SYM_FUNC_START(__memcpy_mcsafe)
+-      cmpl $8, %edx
+-      /* Less than 8 bytes? Go to byte copy loop */
+-      jb .L_no_whole_words
+-
+-      /* Check for bad alignment of source */
+-      testl $7, %esi
+-      /* Already aligned */
+-      jz .L_8byte_aligned
+-
+-      /* Copy one byte at a time until source is 8-byte aligned */
+-      movl %esi, %ecx
+-      andl $7, %ecx
+-      subl $8, %ecx
+-      negl %ecx
+-      subl %ecx, %edx
+-.L_read_leading_bytes:
+-      movb (%rsi), %al
+-      MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
+-      MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
+-.L_write_leading_bytes:
+-      movb %al, (%rdi)
+-      incq %rsi
+-      incq %rdi
+-      decl %ecx
+-      jnz .L_read_leading_bytes
+-
+-.L_8byte_aligned:
+-      movl %edx, %ecx
+-      andl $7, %edx
+-      shrl $3, %ecx
+-      jz .L_no_whole_words
+-
+-.L_read_words:
+-      movq (%rsi), %r8
+-      MCSAFE_TEST_SRC %rsi 8 .E_read_words
+-      MCSAFE_TEST_DST %rdi 8 .E_write_words
+-.L_write_words:
+-      movq %r8, (%rdi)
+-      addq $8, %rsi
+-      addq $8, %rdi
+-      decl %ecx
+-      jnz .L_read_words
+-
+-      /* Any trailing bytes? */
+-.L_no_whole_words:
+-      andl %edx, %edx
+-      jz .L_done_memcpy_trap
+-
+-      /* Copy trailing bytes */
+-      movl %edx, %ecx
+-.L_read_trailing_bytes:
+-      movb (%rsi), %al
+-      MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
+-      MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
+-.L_write_trailing_bytes:
+-      movb %al, (%rdi)
+-      incq %rsi
+-      incq %rdi
+-      decl %ecx
+-      jnz .L_read_trailing_bytes
+-
+-      /* Copy successful. Return zero */
+-.L_done_memcpy_trap:
+-      xorl %eax, %eax
+-.L_done:
+-      ret
+-SYM_FUNC_END(__memcpy_mcsafe)
+-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
+-
+-      .section .fixup, "ax"
+-      /*
+-       * Return number of bytes not copied for any failure. Note that
+-       * there is no "tail" handling since the source buffer is 8-byte
+-       * aligned and poison is cacheline aligned.
+-       */
+-.E_read_words:
+-      shll    $3, %ecx
+-.E_leading_bytes:
+-      addl    %edx, %ecx
+-.E_trailing_bytes:
+-      mov     %ecx, %eax
+-      jmp     .L_done
+-
+-      /*
+-       * For write fault handling, given the destination is unaligned,
+-       * we handle faults on multi-byte writes with a byte-by-byte
+-       * copy up to the write-protected page.
+-       */
+-.E_write_words:
+-      shll    $3, %ecx
+-      addl    %edx, %ecx
+-      movl    %ecx, %edx
+-      jmp mcsafe_handle_tail
+-
+-      .previous
+-
+-      _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+-      _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+-      _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+-      _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+-      _ASM_EXTABLE(.L_write_words, .E_write_words)
+-      _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+-#endif
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -548,8 +548,8 @@ static const char *uaccess_safe_builtin[
+       "__ubsan_handle_shift_out_of_bounds",
+       /* misc */
+       "csum_partial_copy_generic",
+-      "__memcpy_mcsafe",
+-      "mcsafe_handle_tail",
++      "copy_mc_fragile",
++      "copy_mc_fragile_handle_tail",
+       "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
+       NULL
+ };
+--- a/tools/perf/bench/Build
++++ b/tools/perf/bench/Build
+@@ -11,7 +11,6 @@ perf-y += epoll-ctl.o
+ perf-y += synthesize.o
+ perf-y += kallsyms-parse.o
+-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
+ perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+ perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
++++ /dev/null
+@@ -1,24 +0,0 @@
+-/*
+- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
+- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
+- * happy.
+- */
+-#include <linux/types.h>
+-
+-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
+-
+-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
+-{
+-      for (; len; --len, to++, from++) {
+-              /*
+-               * Call the assembly routine back directly since
+-               * memcpy_mcsafe() may silently fallback to memcpy.
+-               */
+-              unsigned long rem = __memcpy_mcsafe(to, from, 1);
+-
+-              if (rem)
+-                      break;
+-      }
+-      return len;
+-}
+--- a/tools/testing/nvdimm/test/nfit.c
++++ b/tools/testing/nvdimm/test/nfit.c
+@@ -23,7 +23,8 @@
+ #include "nfit_test.h"
+ #include "../watermark.h"
+-#include <asm/mcsafe_test.h>
++#include <asm/copy_mc_test.h>
++#include <asm/mce.h>
+ /*
+  * Generate an NFIT table to describe the following topology:
+@@ -3052,7 +3053,7 @@ static struct platform_driver nfit_test_
+       .id_table = nfit_test_id,
+ };
+-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
++static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+ enum INJECT {
+       INJECT_NONE,
+@@ -3060,7 +3061,7 @@ enum INJECT {
+       INJECT_DST,
+ };
+-static void mcsafe_test_init(char *dst, char *src, size_t size)
++static void copy_mc_test_init(char *dst, char *src, size_t size)
+ {
+       size_t i;
+@@ -3069,7 +3070,7 @@ static void mcsafe_test_init(char *dst,
+               src[i] = (char) i;
+ }
+-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
++static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
+               size_t size, unsigned long rem)
+ {
+       size_t i;
+@@ -3090,12 +3091,12 @@ static bool mcsafe_test_validate(unsigne
+       return true;
+ }
+-void mcsafe_test(void)
++void copy_mc_test(void)
+ {
+       char *inject_desc[] = { "none", "source", "destination" };
+       enum INJECT inj;
+-      if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
++      if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
+               pr_info("%s: run...\n", __func__);
+       } else {
+               pr_info("%s: disabled, skip.\n", __func__);
+@@ -3113,31 +3114,31 @@ void mcsafe_test(void)
+                       switch (inj) {
+                       case INJECT_NONE:
+-                              mcsafe_inject_src(NULL);
+-                              mcsafe_inject_dst(NULL);
+-                              dst = &mcsafe_buf[2048];
+-                              src = &mcsafe_buf[1024 - i];
++                              copy_mc_inject_src(NULL);
++                              copy_mc_inject_dst(NULL);
++                              dst = &copy_mc_buf[2048];
++                              src = &copy_mc_buf[1024 - i];
+                               expect = 0;
+                               break;
+                       case INJECT_SRC:
+-                              mcsafe_inject_src(&mcsafe_buf[1024]);
+-                              mcsafe_inject_dst(NULL);
+-                              dst = &mcsafe_buf[2048];
+-                              src = &mcsafe_buf[1024 - i];
++                              copy_mc_inject_src(&copy_mc_buf[1024]);
++                              copy_mc_inject_dst(NULL);
++                              dst = &copy_mc_buf[2048];
++                              src = &copy_mc_buf[1024 - i];
+                               expect = 512 - i;
+                               break;
+                       case INJECT_DST:
+-                              mcsafe_inject_src(NULL);
+-                              mcsafe_inject_dst(&mcsafe_buf[2048]);
+-                              dst = &mcsafe_buf[2048 - i];
+-                              src = &mcsafe_buf[1024];
++                              copy_mc_inject_src(NULL);
++                              copy_mc_inject_dst(&copy_mc_buf[2048]);
++                              dst = &copy_mc_buf[2048 - i];
++                              src = &copy_mc_buf[1024];
+                               expect = 512 - i;
+                               break;
+                       }
+-                      mcsafe_test_init(dst, src, 512);
+-                      rem = __memcpy_mcsafe(dst, src, 512);
+-                      valid = mcsafe_test_validate(dst, src, 512, expect);
++                      copy_mc_test_init(dst, src, 512);
++                      rem = copy_mc_fragile(dst, src, 512);
++                      valid = copy_mc_test_validate(dst, src, 512, expect);
+                       if (rem == expect && valid)
+                               continue;
+                       pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
+@@ -3149,8 +3150,8 @@ void mcsafe_test(void)
+               }
+       }
+-      mcsafe_inject_src(NULL);
+-      mcsafe_inject_dst(NULL);
++      copy_mc_inject_src(NULL);
++      copy_mc_inject_dst(NULL);
+ }
+ static __init int nfit_test_init(void)
+@@ -3161,7 +3162,7 @@ static __init int nfit_test_init(void)
+       libnvdimm_test();
+       acpi_nfit_test();
+       device_dax_test();
+-      mcsafe_test();
++      copy_mc_test();
+       dax_pmem_test();
+       dax_pmem_core_test();
+ #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
+--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
++++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
+@@ -12,4 +12,4 @@ memcpy_p7_t1
+ copyuser_64_exc_t0
+ copyuser_64_exc_t1
+ copyuser_64_exc_t2
+-memcpy_mcsafe_64
++copy_mc_64
+--- a/tools/testing/selftests/powerpc/copyloops/Makefile
++++ b/tools/testing/selftests/powerpc/copyloops/Makefile
+@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
+ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
+               copyuser_p7_t0 copyuser_p7_t1 \
+               memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
+-              memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
++              memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
+               copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+ EXTRA_SOURCES := validate.c ../harness.c stubs.S
+@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%:      memcpy_power7.S
+               -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
+               -o $@ $^
+-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
++$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
+       $(CC) $(CPPFLAGS) $(CFLAGS) \
+-              -D COPY_LOOP=test_memcpy_mcsafe \
++              -D COPY_LOOP=test_copy_mc_generic \
+               -o $@ $^
+ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
+--- /dev/null
++++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
+@@ -0,0 +1,242 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (C) IBM Corporation, 2011
++ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
++ * Author - Balbir Singh <bsingharora@gmail.com>
++ */
++#include <asm/ppc_asm.h>
++#include <asm/errno.h>
++#include <asm/export.h>
++
++      .macro err1
++100:
++      EX_TABLE(100b,.Ldo_err1)
++      .endm
++
++      .macro err2
++200:
++      EX_TABLE(200b,.Ldo_err2)
++      .endm
++
++      .macro err3
++300:  EX_TABLE(300b,.Ldone)
++      .endm
++
++.Ldo_err2:
++      ld      r22,STK_REG(R22)(r1)
++      ld      r21,STK_REG(R21)(r1)
++      ld      r20,STK_REG(R20)(r1)
++      ld      r19,STK_REG(R19)(r1)
++      ld      r18,STK_REG(R18)(r1)
++      ld      r17,STK_REG(R17)(r1)
++      ld      r16,STK_REG(R16)(r1)
++      ld      r15,STK_REG(R15)(r1)
++      ld      r14,STK_REG(R14)(r1)
++      addi    r1,r1,STACKFRAMESIZE
++.Ldo_err1:
++      /* Do a byte by byte copy to get the exact remaining size */
++      mtctr   r7
++46:
++err3; lbz     r0,0(r4)
++      addi    r4,r4,1
++err3; stb     r0,0(r3)
++      addi    r3,r3,1
++      bdnz    46b
++      li      r3,0
++      blr
++
++.Ldone:
++      mfctr   r3
++      blr
++
++
++_GLOBAL(copy_mc_generic)
++      mr      r7,r5
++      cmpldi  r5,16
++      blt     .Lshort_copy
++
++.Lcopy:
++      /* Get the source 8B aligned */
++      neg     r6,r4
++      mtocrf  0x01,r6
++      clrldi  r6,r6,(64-3)
++
++      bf      cr7*4+3,1f
++err1; lbz     r0,0(r4)
++      addi    r4,r4,1
++err1; stb     r0,0(r3)
++      addi    r3,r3,1
++      subi    r7,r7,1
++
++1:    bf      cr7*4+2,2f
++err1; lhz     r0,0(r4)
++      addi    r4,r4,2
++err1; sth     r0,0(r3)
++      addi    r3,r3,2
++      subi    r7,r7,2
++
++2:    bf      cr7*4+1,3f
++err1; lwz     r0,0(r4)
++      addi    r4,r4,4
++err1; stw     r0,0(r3)
++      addi    r3,r3,4
++      subi    r7,r7,4
++
++3:    sub     r5,r5,r6
++      cmpldi  r5,128
++
++      mflr    r0
++      stdu    r1,-STACKFRAMESIZE(r1)
++      std     r14,STK_REG(R14)(r1)
++      std     r15,STK_REG(R15)(r1)
++      std     r16,STK_REG(R16)(r1)
++      std     r17,STK_REG(R17)(r1)
++      std     r18,STK_REG(R18)(r1)
++      std     r19,STK_REG(R19)(r1)
++      std     r20,STK_REG(R20)(r1)
++      std     r21,STK_REG(R21)(r1)
++      std     r22,STK_REG(R22)(r1)
++      std     r0,STACKFRAMESIZE+16(r1)
++
++      blt     5f
++      srdi    r6,r5,7
++      mtctr   r6
++
++      /* Now do cacheline (128B) sized loads and stores. */
++      .align  5
++4:
++err2; ld      r0,0(r4)
++err2; ld      r6,8(r4)
++err2; ld      r8,16(r4)
++err2; ld      r9,24(r4)
++err2; ld      r10,32(r4)
++err2; ld      r11,40(r4)
++err2; ld      r12,48(r4)
++err2; ld      r14,56(r4)
++err2; ld      r15,64(r4)
++err2; ld      r16,72(r4)
++err2; ld      r17,80(r4)
++err2; ld      r18,88(r4)
++err2; ld      r19,96(r4)
++err2; ld      r20,104(r4)
++err2; ld      r21,112(r4)
++err2; ld      r22,120(r4)
++      addi    r4,r4,128
++err2; std     r0,0(r3)
++err2; std     r6,8(r3)
++err2; std     r8,16(r3)
++err2; std     r9,24(r3)
++err2; std     r10,32(r3)
++err2; std     r11,40(r3)
++err2; std     r12,48(r3)
++err2; std     r14,56(r3)
++err2; std     r15,64(r3)
++err2; std     r16,72(r3)
++err2; std     r17,80(r3)
++err2; std     r18,88(r3)
++err2; std     r19,96(r3)
++err2; std     r20,104(r3)
++err2; std     r21,112(r3)
++err2; std     r22,120(r3)
++      addi    r3,r3,128
++      subi    r7,r7,128
++      bdnz    4b
++
++      clrldi  r5,r5,(64-7)
++
++      /* Up to 127B to go */
++5:    srdi    r6,r5,4
++      mtocrf  0x01,r6
++
++6:    bf      cr7*4+1,7f
++err2; ld      r0,0(r4)
++err2; ld      r6,8(r4)
++err2; ld      r8,16(r4)
++err2; ld      r9,24(r4)
++err2; ld      r10,32(r4)
++err2; ld      r11,40(r4)
++err2; ld      r12,48(r4)
++err2; ld      r14,56(r4)
++      addi    r4,r4,64
++err2; std     r0,0(r3)
++err2; std     r6,8(r3)
++err2; std     r8,16(r3)
++err2; std     r9,24(r3)
++err2; std     r10,32(r3)
++err2; std     r11,40(r3)
++err2; std     r12,48(r3)
++err2; std     r14,56(r3)
++      addi    r3,r3,64
++      subi    r7,r7,64
++
++7:    ld      r14,STK_REG(R14)(r1)
++      ld      r15,STK_REG(R15)(r1)
++      ld      r16,STK_REG(R16)(r1)
++      ld      r17,STK_REG(R17)(r1)
++      ld      r18,STK_REG(R18)(r1)
++      ld      r19,STK_REG(R19)(r1)
++      ld      r20,STK_REG(R20)(r1)
++      ld      r21,STK_REG(R21)(r1)
++      ld      r22,STK_REG(R22)(r1)
++      addi    r1,r1,STACKFRAMESIZE
++
++      /* Up to 63B to go */
++      bf      cr7*4+2,8f
++err1; ld      r0,0(r4)
++err1; ld      r6,8(r4)
++err1; ld      r8,16(r4)
++err1; ld      r9,24(r4)
++      addi    r4,r4,32
++err1; std     r0,0(r3)
++err1; std     r6,8(r3)
++err1; std     r8,16(r3)
++err1; std     r9,24(r3)
++      addi    r3,r3,32
++      subi    r7,r7,32
++
++      /* Up to 31B to go */
++8:    bf      cr7*4+3,9f
++err1; ld      r0,0(r4)
++err1; ld      r6,8(r4)
++      addi    r4,r4,16
++err1; std     r0,0(r3)
++err1; std     r6,8(r3)
++      addi    r3,r3,16
++      subi    r7,r7,16
++
++9:    clrldi  r5,r5,(64-4)
++
++      /* Up to 15B to go */
++.Lshort_copy:
++      mtocrf  0x01,r5
++      bf      cr7*4+0,12f
++err1; lwz     r0,0(r4)        /* Less chance of a reject with word ops */
++err1; lwz     r6,4(r4)
++      addi    r4,r4,8
++err1; stw     r0,0(r3)
++err1; stw     r6,4(r3)
++      addi    r3,r3,8
++      subi    r7,r7,8
++
++12:   bf      cr7*4+1,13f
++err1; lwz     r0,0(r4)
++      addi    r4,r4,4
++err1; stw     r0,0(r3)
++      addi    r3,r3,4
++      subi    r7,r7,4
++
++13:   bf      cr7*4+2,14f
++err1; lhz     r0,0(r4)
++      addi    r4,r4,2
++err1; sth     r0,0(r3)
++      addi    r3,r3,2
++      subi    r7,r7,2
++
++14:   bf      cr7*4+3,15f
++err1; lbz     r0,0(r4)
++err1; stb     r0,0(r3)
++
++15:   li      r3,0
++      blr
++
++EXPORT_SYMBOL_GPL(copy_mc_generic);