From a6b525948d858ef78bc6fa52b29d878f70a40c33 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 29 Oct 2020 14:03:58 +0100 Subject: [PATCH] 5.8-stable patches added patches: arm64-link-with-z-norelro-regardless-of-config_relocatable.patch arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch --- ...lro-regardless-of-config_relocatable.patch | 48 + ...rkaround_1-enabling-code-on-all-cpus.patch | 57 + ...rkaround_2-enabling-code-on-all-cpus.patch | 55 + ...efully-with-efi_rng_protocol-failure.patch | 66 + ...le-remove-firmware_efi_embedded-enum.patch | 47 + queue-5.8/series | 8 + ...troduce-copy_mc_enhanced_fast_string.patch | 182 ++ ...build-error-when-acpi-is-not-enabled.patch | 48 + ...py_mcsafe-to-copy_mc_to_-user-kernel.patch | 2346 +++++++++++++++++ 9 files changed, 2857 insertions(+) create mode 100644 queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch create mode 100644 queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch create mode 100644 queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch create mode 100644 queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch create mode 100644 queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch create mode 100644 queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch create mode 100644 queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch create mode 100644 queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch diff --git a/queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch b/queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch new file mode 100644 index 00000000000..a1759f510fd --- /dev/null +++ b/queue-5.8/arm64-link-with-z-norelro-regardless-of-config_relocatable.patch @@ -0,0 +1,48 @@ +From 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 Mon Sep 17 00:00:00 2001 +From: Nick Desaulniers +Date: Fri, 16 Oct 2020 10:53:39 -0700 +Subject: arm64: link with -z norelro regardless of CONFIG_RELOCATABLE + +From: Nick Desaulniers + +commit 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 upstream. + +With CONFIG_EXPERT=y, CONFIG_KASAN=y, CONFIG_RANDOMIZE_BASE=n, +CONFIG_RELOCATABLE=n, we observe the following failure when trying to +link the kernel image with LD=ld.lld: + +error: section: .exit.data is not contiguous with other relro sections + +ld.lld defaults to -z relro while ld.bfd defaults to -z norelro. This +was previously fixed, but only for CONFIG_RELOCATABLE=y. + +Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") +Signed-off-by: Nick Desaulniers +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201016175339.2429280-1-ndesaulniers@google.com +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/Makefile ++++ b/arch/arm64/Makefile +@@ -10,14 +10,14 @@ + # + # Copyright (C) 1995-2001 by Russell King + +-LDFLAGS_vmlinux :=--no-undefined -X ++LDFLAGS_vmlinux :=--no-undefined -X -z norelro + CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) + + ifeq ($(CONFIG_RELOCATABLE), y) + # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour + # for relative relocs, since this leads to better Image compression + # with the relocation offsets always being zero. +-LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ ++LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ + $(call ld-option, --no-apply-dynamic-relocs) + endif + diff --git a/queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch b/queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch new file mode 100644 index 00000000000..730381ae41a --- /dev/null +++ b/queue-5.8/arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch @@ -0,0 +1,57 @@ +From 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Thu, 16 Jul 2020 17:11:09 +0100 +Subject: arm64: Run ARCH_WORKAROUND_1 enabling code on all CPUs + +From: Marc Zyngier + +commit 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 upstream. + +Commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack +thereof") changed the way we deal with ARCH_WORKAROUND_1, by moving most +of the enabling code to the .matches() callback. + +This has the unfortunate effect that the workaround gets only enabled on +the first affected CPU, and no other. + +In order to address this, forcefully call the .matches() callback from a +.cpu_enable() callback, which brings us back to the original behaviour. + +Fixes: 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") +Cc: +Reviewed-by: Suzuki K Poulose +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/cpu_errata.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -599,6 +599,12 @@ check_branch_predictor(const struct arm6 + return (need_wa > 0); + } + ++static void ++cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) ++{ ++ cap->matches(cap, SCOPE_LOCAL_CPU); ++} ++ + static const __maybe_unused struct midr_range tx2_family_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), +@@ -890,9 +896,11 @@ const struct arm64_cpu_capabilities arm6 + }, + #endif + { ++ .desc = "Branch predictor hardening", + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = check_branch_predictor, ++ .cpu_enable = cpu_enable_branch_predictor_hardening, + }, + #ifdef CONFIG_HARDEN_EL2_VECTORS + { diff --git a/queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch b/queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch new file mode 100644 index 00000000000..e22b26527a1 --- /dev/null +++ b/queue-5.8/arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch @@ -0,0 +1,55 @@ +From 39533e12063be7f55e3d6ae21ffe067799d542a4 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Thu, 16 Jul 2020 17:11:10 +0100 +Subject: arm64: Run ARCH_WORKAROUND_2 enabling code on all CPUs + +From: Marc Zyngier + +commit 39533e12063be7f55e3d6ae21ffe067799d542a4 upstream. + +Commit 606f8e7b27bf ("arm64: capabilities: Use linear array for +detection and verification") changed the way we deal with per-CPU errata +by only calling the .matches() callback until one CPU is found to be +affected. At this point, .matches() stop being called, and .cpu_enable() +will be called on all CPUs. + +This breaks the ARCH_WORKAROUND_2 handling, as only a single CPU will be +mitigated. + +In order to address this, forcefully call the .matches() callback from a +.cpu_enable() callback, which brings us back to the original behaviour. + +Fixes: 606f8e7b27bf ("arm64: capabilities: Use linear array for detection and verification") +Cc: +Reviewed-by: Suzuki K Poulose +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/cpu_errata.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -457,6 +457,12 @@ out_printmsg: + return required; + } + ++static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap) ++{ ++ if (ssbd_state != ARM64_SSBD_FORCE_DISABLE) ++ cap->matches(cap, SCOPE_LOCAL_CPU); ++} ++ + /* known invulnerable cores */ + static const struct midr_range arm64_ssb_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), +@@ -914,6 +920,7 @@ const struct arm64_cpu_capabilities arm6 + .capability = ARM64_SSBD, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_ssbd_mitigation, ++ .cpu_enable = cpu_enable_ssbd_mitigation, + .midr_range_list = arm64_ssb_cpus, + }, + #ifdef CONFIG_ARM64_ERRATUM_1418040 diff --git a/queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch b/queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch new file mode 100644 index 00000000000..d18329a51fb --- /dev/null +++ b/queue-5.8/efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch @@ -0,0 +1,66 @@ +From d32de9130f6c79533508e2c7879f18997bfbe2a0 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Sat, 26 Sep 2020 10:52:42 +0200 +Subject: efi/arm64: libstub: Deal gracefully with EFI_RNG_PROTOCOL failure + +From: Ard Biesheuvel + +commit d32de9130f6c79533508e2c7879f18997bfbe2a0 upstream. + +Currently, on arm64, we abort on any failure from efi_get_random_bytes() +other than EFI_NOT_FOUND when it comes to setting the physical seed for +KASLR, but ignore such failures when obtaining the seed for virtual +KASLR or for early seeding of the kernel's entropy pool via the config +table. This is inconsistent, and may lead to unexpected boot failures. + +So let's permit any failure for the physical seed, and simply report +the error code if it does not equal EFI_NOT_FOUND. + +Cc: # v5.8+ +Reported-by: Heinrich Schuchardt +Signed-off-by: Ard Biesheuvel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/firmware/efi/libstub/arm64-stub.c | 8 +++++--- + drivers/firmware/efi/libstub/fdt.c | 4 +--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/firmware/efi/libstub/arm64-stub.c ++++ b/drivers/firmware/efi/libstub/arm64-stub.c +@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigne + status = efi_get_random_bytes(sizeof(phys_seed), + (u8 *)&phys_seed); + if (status == EFI_NOT_FOUND) { +- efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); ++ efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n"); ++ efi_nokaslr = true; + } else if (status != EFI_SUCCESS) { +- efi_err("efi_get_random_bytes() failed\n"); +- return status; ++ efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n", ++ status); ++ efi_nokaslr = true; + } + } else { + efi_info("KASLR disabled on kernel command line\n"); +--- a/drivers/firmware/efi/libstub/fdt.c ++++ b/drivers/firmware/efi/libstub/fdt.c +@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *ori + if (status) + goto fdt_set_fail; + +- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { ++ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + efi_status_t efi_status; + + efi_status = efi_get_random_bytes(sizeof(fdt_val64), +@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *ori + status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64); + if (status) + goto fdt_set_fail; +- } else if (efi_status != EFI_NOT_FOUND) { +- return efi_status; + } + } + diff --git a/queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch b/queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch new file mode 100644 index 00000000000..990f7a291d4 --- /dev/null +++ b/queue-5.8/fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch @@ -0,0 +1,47 @@ +From 06e67b849ab910a49a629445f43edb074153d0eb Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Fri, 2 Oct 2020 10:38:14 -0700 +Subject: fs/kernel_read_file: Remove FIRMWARE_EFI_EMBEDDED enum + +From: Kees Cook + +commit 06e67b849ab910a49a629445f43edb074153d0eb upstream. + +The "FIRMWARE_EFI_EMBEDDED" enum is a "where", not a "what". It +should not be distinguished separately from just "FIRMWARE", as this +confuses the LSMs about what is being loaded. Additionally, there was +no actual validation of the firmware contents happening. + +Fixes: e4c2c0ff00ec ("firmware: Add new platform fallback mechanism and firmware_request_platform()") +Signed-off-by: Kees Cook +Reviewed-by: Luis Chamberlain +Acked-by: Scott Branden +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201002173828.2099543-3-keescook@chromium.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/base/firmware_loader/fallback_platform.c | 2 +- + include/linux/fs.h | 1 - + 2 files changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/base/firmware_loader/fallback_platform.c ++++ b/drivers/base/firmware_loader/fallback_platform.c +@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw + if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM)) + return -ENOENT; + +- rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED); ++ rc = security_kernel_load_data(LOADING_FIRMWARE); + if (rc) + return rc; + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -3011,7 +3011,6 @@ extern int do_pipe_flags(int *, int); + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(FIRMWARE_PREALLOC_BUFFER, firmware) \ +- id(FIRMWARE_EFI_EMBEDDED, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ diff --git a/queue-5.8/series b/queue-5.8/series index 6ca858b6e1c..b405edfdf90 100644 --- a/queue-5.8/series +++ b/queue-5.8/series @@ -15,3 +15,11 @@ io_uring-fix-use-of-xarray-in-__io_uring_files_cancel.patch io_uring-fix-xarray-usage-in-io_uring_add_task_file.patch io_uring-convert-advanced-xarray-uses-to-the-normal-api.patch scripts-setlocalversion-make-git-describe-output-more-reliable.patch +efi-arm64-libstub-deal-gracefully-with-efi_rng_protocol-failure.patch +fs-kernel_read_file-remove-firmware_efi_embedded-enum.patch +arm64-run-arch_workaround_1-enabling-code-on-all-cpus.patch +arm64-run-arch_workaround_2-enabling-code-on-all-cpus.patch +arm64-link-with-z-norelro-regardless-of-config_relocatable.patch +x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch +x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch +x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch diff --git a/queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch b/queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch new file mode 100644 index 00000000000..44918bccb97 --- /dev/null +++ b/queue-5.8/x86-copy_mc-introduce-copy_mc_enhanced_fast_string.patch @@ -0,0 +1,182 @@ +From 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Mon, 5 Oct 2020 20:40:25 -0700 +Subject: x86/copy_mc: Introduce copy_mc_enhanced_fast_string() + +From: Dan Williams + +commit 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 upstream. + +The motivations to go rework memcpy_mcsafe() are that the benefit of +doing slow and careful copies is obviated on newer CPUs, and that the +current opt-in list of CPUs to instrument recovery is broken relative to +those CPUs. There is no need to keep an opt-in list up to date on an +ongoing basis if pmem/dax operations are instrumented for recovery by +default. With recovery enabled by default the old "mcsafe_key" opt-in to +careful copying can be made a "fragile" opt-out. Where the "fragile" +list takes steps to not consume poison across cachelines. + +The discussion with Linus made clear that the current "_mcsafe" suffix +was imprecise to a fault. The operations that are needed by pmem/dax are +to copy from a source address that might throw #MC to a destination that +may write-fault, if it is a user page. + +So copy_to_user_mcsafe() becomes copy_mc_to_user() to indicate +the separate precautions taken on source and destination. +copy_mc_to_kernel() is introduced as a non-SMAP version that does not +expect write-faults on the destination, but is still prepared to abort +with an error code upon taking #MC. + +The original copy_mc_fragile() implementation had negative performance +implications since it did not use the fast-string instruction sequence +to perform copies. For this reason copy_mc_to_kernel() fell back to +plain memcpy() to preserve performance on platforms that did not indicate +the capability to recover from machine check exceptions. However, that +capability detection was not architectural and now that some platforms +can recover from fast-string consumption of memory errors the memcpy() +fallback now causes these more capable platforms to fail. + +Introduce copy_mc_enhanced_fast_string() as the fast default +implementation of copy_mc_to_kernel() and finalize the transition of +copy_mc_fragile() to be a platform quirk to indicate 'copy-carefully'. +With this in place, copy_mc_to_kernel() is fast and recovery-ready by +default regardless of hardware capability. + +Thanks to Vivek for identifying that copy_user_generic() is not suitable +as the copy_mc_to_user() backend since the #MC handler explicitly checks +ex_has_fault_handler(). Thanks to the 0day robot for catching a +performance bug in the x86/copy_mc_to_user implementation. + + [ bp: Add the "why" for this change from the 0/2th message, massage. ] + +Fixes: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()") +Reported-by: Erwin Tsaur +Reported-by: 0day robot +Signed-off-by: Dan Williams +Signed-off-by: Borislav Petkov +Reviewed-by: Tony Luck +Tested-by: Erwin Tsaur +Cc: +Link: https://lkml.kernel.org/r/160195562556.2163339.18063423034951948973.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/lib/copy_mc.c | 32 +++++++++++++++++++++++--------- + arch/x86/lib/copy_mc_64.S | 36 ++++++++++++++++++++++++++++++++++++ + tools/objtool/check.c | 1 + + 3 files changed, 60 insertions(+), 9 deletions(-) + +--- a/arch/x86/lib/copy_mc.c ++++ b/arch/x86/lib/copy_mc.c +@@ -45,6 +45,8 @@ void enable_copy_mc_fragile(void) + #define copy_mc_fragile_enabled (0) + #endif + ++unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len); ++ + /** + * copy_mc_to_kernel - memory copy that handles source exceptions + * +@@ -52,9 +54,11 @@ void enable_copy_mc_fragile(void) + * @src: source address + * @len: number of bytes to copy + * +- * Call into the 'fragile' version on systems that have trouble +- * actually do machine check recovery. Everyone else can just +- * use memcpy(). ++ * Call into the 'fragile' version on systems that benefit from avoiding ++ * corner case poison consumption scenarios, For example, accessing ++ * poison across 2 cachelines with a single instruction. Almost all ++ * other uses case can use copy_mc_enhanced_fast_string() for a fast ++ * recoverable copy, or fallback to plain memcpy. + * + * Return 0 for success, or number of bytes not copied if there was an + * exception. +@@ -63,6 +67,8 @@ unsigned long __must_check copy_mc_to_ke + { + if (copy_mc_fragile_enabled) + return copy_mc_fragile(dst, src, len); ++ if (static_cpu_has(X86_FEATURE_ERMS)) ++ return copy_mc_enhanced_fast_string(dst, src, len); + memcpy(dst, src, len); + return 0; + } +@@ -72,11 +78,19 @@ unsigned long __must_check copy_mc_to_us + { + unsigned long ret; + +- if (!copy_mc_fragile_enabled) +- return copy_user_generic(dst, src, len); ++ if (copy_mc_fragile_enabled) { ++ __uaccess_begin(); ++ ret = copy_mc_fragile(dst, src, len); ++ __uaccess_end(); ++ return ret; ++ } ++ ++ if (static_cpu_has(X86_FEATURE_ERMS)) { ++ __uaccess_begin(); ++ ret = copy_mc_enhanced_fast_string(dst, src, len); ++ __uaccess_end(); ++ return ret; ++ } + +- __uaccess_begin(); +- ret = copy_mc_fragile(dst, src, len); +- __uaccess_end(); +- return ret; ++ return copy_user_generic(dst, src, len); + } +--- a/arch/x86/lib/copy_mc_64.S ++++ b/arch/x86/lib/copy_mc_64.S +@@ -124,4 +124,40 @@ EXPORT_SYMBOL_GPL(copy_mc_fragile) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) + #endif /* CONFIG_X86_MCE */ ++ ++/* ++ * copy_mc_enhanced_fast_string - memory copy with exception handling ++ * ++ * Fast string copy + fault / exception handling. If the CPU does ++ * support machine check exception recovery, but does not support ++ * recovering from fast-string exceptions then this CPU needs to be ++ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any ++ * machine check recovery support this version should be no slower than ++ * standard memcpy. ++ */ ++SYM_FUNC_START(copy_mc_enhanced_fast_string) ++ movq %rdi, %rax ++ movq %rdx, %rcx ++.L_copy: ++ rep movsb ++ /* Copy successful. Return zero */ ++ xorl %eax, %eax ++ ret ++SYM_FUNC_END(copy_mc_enhanced_fast_string) ++ ++ .section .fixup, "ax" ++.E_copy: ++ /* ++ * On fault %rcx is updated such that the copy instruction could ++ * optionally be restarted at the fault position, i.e. it ++ * contains 'bytes remaining'. A non-zero return indicates error ++ * to copy_mc_generic() users, or indicate short transfers to ++ * user-copy routines. ++ */ ++ movq %rcx, %rax ++ ret ++ ++ .previous ++ ++ _ASM_EXTABLE_FAULT(.L_copy, .E_copy) + #endif /* !CONFIG_UML */ +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -550,6 +550,7 @@ static const char *uaccess_safe_builtin[ + "csum_partial_copy_generic", + "copy_mc_fragile", + "copy_mc_fragile_handle_tail", ++ "copy_mc_enhanced_fast_string", + "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + NULL + }; diff --git a/queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch b/queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch new file mode 100644 index 00000000000..48efc32f5df --- /dev/null +++ b/queue-5.8/x86-pci-fix-intel_mid_pci.c-build-error-when-acpi-is-not-enabled.patch @@ -0,0 +1,48 @@ +From 035fff1f7aab43e420e0098f0854470a5286fb83 Mon Sep 17 00:00:00 2001 +From: Randy Dunlap +Date: Fri, 21 Aug 2020 17:10:27 -0700 +Subject: x86/PCI: Fix intel_mid_pci.c build error when ACPI is not enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Randy Dunlap + +commit 035fff1f7aab43e420e0098f0854470a5286fb83 upstream. + +Fix build error when CONFIG_ACPI is not set/enabled by adding the header +file which contains a stub for the function in the build +error. + + ../arch/x86/pci/intel_mid_pci.c: In function ‘intel_mid_pci_init’: + ../arch/x86/pci/intel_mid_pci.c:303:2: error: implicit declaration of function ‘acpi_noirq_set’; did you mean ‘acpi_irq_get’? [-Werror=implicit-function-declaration] + acpi_noirq_set(); + +Fixes: a912a7584ec3 ("x86/platform/intel-mid: Move PCI initialization to arch_init()") +Link: https://lore.kernel.org/r/ea903917-e51b-4cc9-2680-bc1e36efa026@infradead.org +Signed-off-by: Randy Dunlap +Signed-off-by: Bjorn Helgaas +Reviewed-by: Andy Shevchenko +Reviewed-by: Jesse Barnes +Acked-by: Thomas Gleixner +Cc: stable@vger.kernel.org # v4.16+ +Cc: Jacob Pan +Cc: Len Brown +Cc: Jesse Barnes +Cc: Arjan van de Ven +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/pci/intel_mid_pci.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/pci/intel_mid_pci.c ++++ b/arch/x86/pci/intel_mid_pci.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + + #define PCIE_CAP_OFFSET 0x100 + diff --git a/queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch b/queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch new file mode 100644 index 00000000000..d55f49712d5 --- /dev/null +++ b/queue-5.8/x86-powerpc-rename-memcpy_mcsafe-to-copy_mc_to_-user-kernel.patch @@ -0,0 +1,2346 @@ +From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Mon, 5 Oct 2020 20:40:16 -0700 +Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() + +From: Dan Williams + +commit ec6347bb43395cb92126788a1a5b25302543f815 upstream. + +In reaction to a proposal to introduce a memcpy_mcsafe_fast() +implementation Linus points out that memcpy_mcsafe() is poorly named +relative to communicating the scope of the interface. Specifically what +addresses are valid to pass as source, destination, and what faults / +exceptions are handled. + +Of particular concern is that even though x86 might be able to handle +the semantics of copy_mc_to_user() with its common copy_user_generic() +implementation other archs likely need / want an explicit path for this +case: + + On Fri, May 1, 2020 at 11:28 AM Linus Torvalds wrote: + > + > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams wrote: + > > + > > However now I see that copy_user_generic() works for the wrong reason. + > > It works because the exception on the source address due to poison + > > looks no different than a write fault on the user address to the + > > caller, it's still just a short copy. So it makes copy_to_user() work + > > for the wrong reason relative to the name. + > + > Right. + > + > And it won't work that way on other architectures. On x86, we have a + > generic function that can take faults on either side, and we use it + > for both cases (and for the "in_user" case too), but that's an + > artifact of the architecture oddity. + > + > In fact, it's probably wrong even on x86 - because it can hide bugs - + > but writing those things is painful enough that everybody prefers + > having just one function. + +Replace a single top-level memcpy_mcsafe() with either +copy_mc_to_user(), or copy_mc_to_kernel(). + +Introduce an x86 copy_mc_fragile() name as the rename for the +low-level x86 implementation formerly named memcpy_mcsafe(). It is used +as the slow / careful backend that is supplanted by a fast +copy_mc_generic() in a follow-on patch. + +One side-effect of this reorganization is that separating copy_mc_64.S +to its own file means that perf no longer needs to track dependencies +for its memcpy_64.S benchmarks. + + [ bp: Massage a bit. ] + +Signed-off-by: Dan Williams +Signed-off-by: Borislav Petkov +Reviewed-by: Tony Luck +Acked-by: Michael Ellerman +Cc: +Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com +Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/Kconfig | 2 + arch/powerpc/include/asm/string.h | 2 + arch/powerpc/include/asm/uaccess.h | 40 +- + arch/powerpc/lib/Makefile | 2 + arch/powerpc/lib/copy_mc_64.S | 242 +++++++++++++++++ + arch/powerpc/lib/memcpy_mcsafe_64.S | 242 ----------------- + arch/x86/Kconfig | 2 + arch/x86/Kconfig.debug | 2 + arch/x86/include/asm/copy_mc_test.h | 75 +++++ + arch/x86/include/asm/mce.h | 9 + arch/x86/include/asm/mcsafe_test.h | 75 ----- + arch/x86/include/asm/string_64.h | 32 -- + arch/x86/include/asm/uaccess.h | 9 + arch/x86/include/asm/uaccess_64.h | 20 - + arch/x86/kernel/cpu/mce/core.c | 8 + arch/x86/kernel/quirks.c | 10 + arch/x86/lib/Makefile | 1 + arch/x86/lib/copy_mc.c | 82 +++++ + arch/x86/lib/copy_mc_64.S | 127 ++++++++ + arch/x86/lib/memcpy_64.S | 115 -------- + arch/x86/lib/usercopy_64.c | 21 - + drivers/md/dm-writecache.c | 15 - + drivers/nvdimm/claim.c | 2 + drivers/nvdimm/pmem.c | 6 + include/linux/string.h | 9 + include/linux/uaccess.h | 13 + include/linux/uio.h | 10 + lib/Kconfig | 7 + lib/iov_iter.c | 48 +-- + tools/arch/x86/include/asm/mcsafe_test.h | 13 + tools/arch/x86/lib/memcpy_64.S | 115 -------- + tools/objtool/check.c | 4 + tools/perf/bench/Build | 1 + tools/perf/bench/mem-memcpy-x86-64-lib.c | 24 - + tools/testing/nvdimm/test/nfit.c | 49 +-- + tools/testing/selftests/powerpc/copyloops/.gitignore | 2 + tools/testing/selftests/powerpc/copyloops/Makefile | 6 + tools/testing/selftests/powerpc/copyloops/copy_mc_64.S | 242 +++++++++++++++++ + 38 files changed, 914 insertions(+), 770 deletions(-) + +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -135,7 +135,7 @@ config PPC + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UACCESS_FLUSHCACHE +- select ARCH_HAS_UACCESS_MCSAFE if PPC64 ++ select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_KEEP_MEMBLOCK +--- a/arch/powerpc/include/asm/string.h ++++ b/arch/powerpc/include/asm/string.h +@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *fr + #ifndef CONFIG_KASAN + #define __HAVE_ARCH_MEMSET32 + #define __HAVE_ARCH_MEMSET64 +-#define __HAVE_ARCH_MEMCPY_MCSAFE + +-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); + extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); + extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); + extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); +--- a/arch/powerpc/include/asm/uaccess.h ++++ b/arch/powerpc/include/asm/uaccess.h +@@ -436,6 +436,32 @@ do { \ + extern unsigned long __copy_tofrom_user(void __user *to, + const void __user *from, unsigned long size); + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++unsigned long __must_check ++copy_mc_generic(void *to, const void *from, unsigned long size); ++ ++static inline unsigned long __must_check ++copy_mc_to_kernel(void *to, const void *from, unsigned long size) ++{ ++ return copy_mc_generic(to, from, size); ++} ++#define copy_mc_to_kernel copy_mc_to_kernel ++ ++static inline unsigned long __must_check ++copy_mc_to_user(void __user *to, const void *from, unsigned long n) ++{ ++ if (likely(check_copy_size(from, n, true))) { ++ if (access_ok(to, n)) { ++ allow_write_to_user(to, n); ++ n = copy_mc_generic((void *)to, from, n); ++ prevent_write_to_user(to, n); ++ } ++ } ++ ++ return n; ++} ++#endif ++ + #ifdef __powerpc64__ + static inline unsigned long + raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) +@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const + return ret; + } + +-static __always_inline unsigned long __must_check +-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) +-{ +- if (likely(check_copy_size(from, n, true))) { +- if (access_ok(to, n)) { +- allow_write_to_user(to, n); +- n = memcpy_mcsafe((void *)to, from, n); +- prevent_write_to_user(to, n); +- } +- } +- +- return n; +-} +- + unsigned long __arch_clear_user(void __user *addr, unsigned long size); + + static inline unsigned long clear_user(void __user *addr, unsigned long size) +--- a/arch/powerpc/lib/Makefile ++++ b/arch/powerpc/lib/Makefile +@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_ + memcpy_power7.o + + obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ +- memcpy_64.o memcpy_mcsafe_64.o ++ memcpy_64.o copy_mc_64.o + + obj64-$(CONFIG_SMP) += locks.o + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +--- /dev/null ++++ b/arch/powerpc/lib/copy_mc_64.S +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) IBM Corporation, 2011 ++ * Derived from copyuser_power7.s by Anton Blanchard ++ * Author - Balbir Singh ++ */ ++#include ++#include ++#include ++ ++ .macro err1 ++100: ++ EX_TABLE(100b,.Ldo_err1) ++ .endm ++ ++ .macro err2 ++200: ++ EX_TABLE(200b,.Ldo_err2) ++ .endm ++ ++ .macro err3 ++300: EX_TABLE(300b,.Ldone) ++ .endm ++ ++.Ldo_err2: ++ ld r22,STK_REG(R22)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r14,STK_REG(R14)(r1) ++ addi r1,r1,STACKFRAMESIZE ++.Ldo_err1: ++ /* Do a byte by byte copy to get the exact remaining size */ ++ mtctr r7 ++46: ++err3; lbz r0,0(r4) ++ addi r4,r4,1 ++err3; stb r0,0(r3) ++ addi r3,r3,1 ++ bdnz 46b ++ li r3,0 ++ blr ++ ++.Ldone: ++ mfctr r3 ++ blr ++ ++ ++_GLOBAL(copy_mc_generic) ++ mr r7,r5 ++ cmpldi r5,16 ++ blt .Lshort_copy ++ ++.Lcopy: ++ /* Get the source 8B aligned */ ++ neg r6,r4 ++ mtocrf 0x01,r6 ++ clrldi r6,r6,(64-3) ++ ++ bf cr7*4+3,1f ++err1; lbz r0,0(r4) ++ addi r4,r4,1 ++err1; stb r0,0(r3) ++ addi r3,r3,1 ++ subi r7,r7,1 ++ ++1: bf cr7*4+2,2f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++2: bf cr7*4+1,3f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++3: sub r5,r5,r6 ++ cmpldi r5,128 ++ ++ mflr r0 ++ stdu r1,-STACKFRAMESIZE(r1) ++ std r14,STK_REG(R14)(r1) ++ std r15,STK_REG(R15)(r1) ++ std r16,STK_REG(R16)(r1) ++ std r17,STK_REG(R17)(r1) ++ std r18,STK_REG(R18)(r1) ++ std r19,STK_REG(R19)(r1) ++ std r20,STK_REG(R20)(r1) ++ std r21,STK_REG(R21)(r1) ++ std r22,STK_REG(R22)(r1) ++ std r0,STACKFRAMESIZE+16(r1) ++ ++ blt 5f ++ srdi r6,r5,7 ++ mtctr r6 ++ ++ /* Now do cacheline (128B) sized loads and stores. */ ++ .align 5 ++4: ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++err2; ld r15,64(r4) ++err2; ld r16,72(r4) ++err2; ld r17,80(r4) ++err2; ld r18,88(r4) ++err2; ld r19,96(r4) ++err2; ld r20,104(r4) ++err2; ld r21,112(r4) ++err2; ld r22,120(r4) ++ addi r4,r4,128 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++err2; std r15,64(r3) ++err2; std r16,72(r3) ++err2; std r17,80(r3) ++err2; std r18,88(r3) ++err2; std r19,96(r3) ++err2; std r20,104(r3) ++err2; std r21,112(r3) ++err2; std r22,120(r3) ++ addi r3,r3,128 ++ subi r7,r7,128 ++ bdnz 4b ++ ++ clrldi r5,r5,(64-7) ++ ++ /* Up to 127B to go */ ++5: srdi r6,r5,4 ++ mtocrf 0x01,r6 ++ ++6: bf cr7*4+1,7f ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++ addi r4,r4,64 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++ addi r3,r3,64 ++ subi r7,r7,64 ++ ++7: ld r14,STK_REG(R14)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r22,STK_REG(R22)(r1) ++ addi r1,r1,STACKFRAMESIZE ++ ++ /* Up to 63B to go */ ++ bf cr7*4+2,8f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++err1; ld r8,16(r4) ++err1; ld r9,24(r4) ++ addi r4,r4,32 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++err1; std r8,16(r3) ++err1; std r9,24(r3) ++ addi r3,r3,32 ++ subi r7,r7,32 ++ ++ /* Up to 31B to go */ ++8: bf cr7*4+3,9f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++ addi r4,r4,16 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++ addi r3,r3,16 ++ subi r7,r7,16 ++ ++9: clrldi r5,r5,(64-4) ++ ++ /* Up to 15B to go */ ++.Lshort_copy: ++ mtocrf 0x01,r5 ++ bf cr7*4+0,12f ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ ++err1; lwz r6,4(r4) ++ addi r4,r4,8 ++err1; stw r0,0(r3) ++err1; stw r6,4(r3) ++ addi r3,r3,8 ++ subi r7,r7,8 ++ ++12: bf cr7*4+1,13f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++13: bf cr7*4+2,14f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++14: bf cr7*4+3,15f ++err1; lbz r0,0(r4) ++err1; stb r0,0(r3) ++ ++15: li r3,0 ++ blr ++ ++EXPORT_SYMBOL_GPL(copy_mc_generic); +--- a/arch/powerpc/lib/memcpy_mcsafe_64.S ++++ /dev/null +@@ -1,242 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright (C) IBM Corporation, 2011 +- * Derived from copyuser_power7.s by Anton Blanchard +- * Author - Balbir Singh +- */ +-#include +-#include +-#include +- +- .macro err1 +-100: +- EX_TABLE(100b,.Ldo_err1) +- .endm +- +- .macro err2 +-200: +- EX_TABLE(200b,.Ldo_err2) +- .endm +- +- .macro err3 +-300: EX_TABLE(300b,.Ldone) +- .endm +- +-.Ldo_err2: +- ld r22,STK_REG(R22)(r1) +- ld r21,STK_REG(R21)(r1) +- ld r20,STK_REG(R20)(r1) +- ld r19,STK_REG(R19)(r1) +- ld r18,STK_REG(R18)(r1) +- ld r17,STK_REG(R17)(r1) +- ld r16,STK_REG(R16)(r1) +- ld r15,STK_REG(R15)(r1) +- ld r14,STK_REG(R14)(r1) +- addi r1,r1,STACKFRAMESIZE +-.Ldo_err1: +- /* Do a byte by byte copy to get the exact remaining size */ +- mtctr r7 +-46: +-err3; lbz r0,0(r4) +- addi r4,r4,1 +-err3; stb r0,0(r3) +- addi r3,r3,1 +- bdnz 46b +- li r3,0 +- blr +- +-.Ldone: +- mfctr r3 +- blr +- +- +-_GLOBAL(memcpy_mcsafe) +- mr r7,r5 +- cmpldi r5,16 +- blt .Lshort_copy +- +-.Lcopy: +- /* Get the source 8B aligned */ +- neg r6,r4 +- mtocrf 0x01,r6 +- clrldi r6,r6,(64-3) +- +- bf cr7*4+3,1f +-err1; lbz r0,0(r4) +- addi r4,r4,1 +-err1; stb r0,0(r3) +- addi r3,r3,1 +- subi r7,r7,1 +- +-1: bf cr7*4+2,2f +-err1; lhz r0,0(r4) +- addi r4,r4,2 +-err1; sth r0,0(r3) +- addi r3,r3,2 +- subi r7,r7,2 +- +-2: bf cr7*4+1,3f +-err1; lwz r0,0(r4) +- addi r4,r4,4 +-err1; stw r0,0(r3) +- addi r3,r3,4 +- subi r7,r7,4 +- +-3: sub r5,r5,r6 +- cmpldi r5,128 +- +- mflr r0 +- stdu r1,-STACKFRAMESIZE(r1) +- std r14,STK_REG(R14)(r1) +- std r15,STK_REG(R15)(r1) +- std r16,STK_REG(R16)(r1) +- std r17,STK_REG(R17)(r1) +- std r18,STK_REG(R18)(r1) +- std r19,STK_REG(R19)(r1) +- std r20,STK_REG(R20)(r1) +- std r21,STK_REG(R21)(r1) +- std r22,STK_REG(R22)(r1) +- std r0,STACKFRAMESIZE+16(r1) +- +- blt 5f +- srdi r6,r5,7 +- mtctr r6 +- +- /* Now do cacheline (128B) sized loads and stores. */ +- .align 5 +-4: +-err2; ld r0,0(r4) +-err2; ld r6,8(r4) +-err2; ld r8,16(r4) +-err2; ld r9,24(r4) +-err2; ld r10,32(r4) +-err2; ld r11,40(r4) +-err2; ld r12,48(r4) +-err2; ld r14,56(r4) +-err2; ld r15,64(r4) +-err2; ld r16,72(r4) +-err2; ld r17,80(r4) +-err2; ld r18,88(r4) +-err2; ld r19,96(r4) +-err2; ld r20,104(r4) +-err2; ld r21,112(r4) +-err2; ld r22,120(r4) +- addi r4,r4,128 +-err2; std r0,0(r3) +-err2; std r6,8(r3) +-err2; std r8,16(r3) +-err2; std r9,24(r3) +-err2; std r10,32(r3) +-err2; std r11,40(r3) +-err2; std r12,48(r3) +-err2; std r14,56(r3) +-err2; std r15,64(r3) +-err2; std r16,72(r3) +-err2; std r17,80(r3) +-err2; std r18,88(r3) +-err2; std r19,96(r3) +-err2; std r20,104(r3) +-err2; std r21,112(r3) +-err2; std r22,120(r3) +- addi r3,r3,128 +- subi r7,r7,128 +- bdnz 4b +- +- clrldi r5,r5,(64-7) +- +- /* Up to 127B to go */ +-5: srdi r6,r5,4 +- mtocrf 0x01,r6 +- +-6: bf cr7*4+1,7f +-err2; ld r0,0(r4) +-err2; ld r6,8(r4) +-err2; ld r8,16(r4) +-err2; ld r9,24(r4) +-err2; ld r10,32(r4) +-err2; ld r11,40(r4) +-err2; ld r12,48(r4) +-err2; ld r14,56(r4) +- addi r4,r4,64 +-err2; std r0,0(r3) +-err2; std r6,8(r3) +-err2; std r8,16(r3) +-err2; std r9,24(r3) +-err2; std r10,32(r3) +-err2; std r11,40(r3) +-err2; std r12,48(r3) +-err2; std r14,56(r3) +- addi r3,r3,64 +- subi r7,r7,64 +- +-7: ld r14,STK_REG(R14)(r1) +- ld r15,STK_REG(R15)(r1) +- ld r16,STK_REG(R16)(r1) +- ld r17,STK_REG(R17)(r1) +- ld r18,STK_REG(R18)(r1) +- ld r19,STK_REG(R19)(r1) +- ld r20,STK_REG(R20)(r1) +- ld r21,STK_REG(R21)(r1) +- ld r22,STK_REG(R22)(r1) +- addi r1,r1,STACKFRAMESIZE +- +- /* Up to 63B to go */ +- bf cr7*4+2,8f +-err1; ld r0,0(r4) +-err1; ld r6,8(r4) +-err1; ld r8,16(r4) +-err1; ld r9,24(r4) +- addi r4,r4,32 +-err1; std r0,0(r3) +-err1; std r6,8(r3) +-err1; std r8,16(r3) +-err1; std r9,24(r3) +- addi r3,r3,32 +- subi r7,r7,32 +- +- /* Up to 31B to go */ +-8: bf cr7*4+3,9f +-err1; ld r0,0(r4) +-err1; ld r6,8(r4) +- addi r4,r4,16 +-err1; std r0,0(r3) +-err1; std r6,8(r3) +- addi r3,r3,16 +- subi r7,r7,16 +- +-9: clrldi r5,r5,(64-4) +- +- /* Up to 15B to go */ +-.Lshort_copy: +- mtocrf 0x01,r5 +- bf cr7*4+0,12f +-err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ +-err1; lwz r6,4(r4) +- addi r4,r4,8 +-err1; stw r0,0(r3) +-err1; stw r6,4(r3) +- addi r3,r3,8 +- subi r7,r7,8 +- +-12: bf cr7*4+1,13f +-err1; lwz r0,0(r4) +- addi r4,r4,4 +-err1; stw r0,0(r3) +- addi r3,r3,4 +- subi r7,r7,4 +- +-13: bf cr7*4+2,14f +-err1; lhz r0,0(r4) +- addi r4,r4,2 +-err1; sth r0,0(r3) +- addi r3,r3,2 +- subi r7,r7,2 +- +-14: bf cr7*4+3,15f +-err1; lbz r0,0(r4) +-err1; stb r0,0(r3) +- +-15: li r3,0 +- blr +- +-EXPORT_SYMBOL_GPL(memcpy_mcsafe); +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -75,7 +75,7 @@ config X86 + select ARCH_HAS_PTE_DEVMAP if X86_64 + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 +- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE ++ select ARCH_HAS_COPY_MC if X86_64 + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_STRICT_KERNEL_RWX +--- a/arch/x86/Kconfig.debug ++++ b/arch/x86/Kconfig.debug +@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC + You should normally say N here, unless you want to debug early + crashes or need a very simple printk logging facility. + +-config MCSAFE_TEST ++config COPY_MC_TEST + def_bool n + + config EFI_PGT_DUMP +--- /dev/null ++++ b/arch/x86/include/asm/copy_mc_test.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _COPY_MC_TEST_H_ ++#define _COPY_MC_TEST_H_ ++ ++#ifndef __ASSEMBLY__ ++#ifdef CONFIG_COPY_MC_TEST ++extern unsigned long copy_mc_test_src; ++extern unsigned long copy_mc_test_dst; ++ ++static inline void copy_mc_inject_src(void *addr) ++{ ++ if (addr) ++ copy_mc_test_src = (unsigned long) addr; ++ else ++ copy_mc_test_src = ~0UL; ++} ++ ++static inline void copy_mc_inject_dst(void *addr) ++{ ++ if (addr) ++ copy_mc_test_dst = (unsigned long) addr; ++ else ++ copy_mc_test_dst = ~0UL; ++} ++#else /* CONFIG_COPY_MC_TEST */ ++static inline void copy_mc_inject_src(void *addr) ++{ ++} ++ ++static inline void copy_mc_inject_dst(void *addr) ++{ ++} ++#endif /* CONFIG_COPY_MC_TEST */ ++ ++#else /* __ASSEMBLY__ */ ++#include ++ ++#ifdef CONFIG_COPY_MC_TEST ++.macro COPY_MC_TEST_CTL ++ .pushsection .data ++ .align 8 ++ .globl copy_mc_test_src ++ copy_mc_test_src: ++ .quad 0 ++ EXPORT_SYMBOL_GPL(copy_mc_test_src) ++ .globl copy_mc_test_dst ++ copy_mc_test_dst: ++ .quad 0 ++ EXPORT_SYMBOL_GPL(copy_mc_test_dst) ++ .popsection ++.endm ++ ++.macro COPY_MC_TEST_SRC reg count target ++ leaq \count(\reg), %r9 ++ cmp copy_mc_test_src, %r9 ++ ja \target ++.endm ++ ++.macro COPY_MC_TEST_DST reg count target ++ leaq \count(\reg), %r9 ++ cmp copy_mc_test_dst, %r9 ++ ja \target ++.endm ++#else ++.macro COPY_MC_TEST_CTL ++.endm ++ ++.macro COPY_MC_TEST_SRC reg count target ++.endm ++ ++.macro COPY_MC_TEST_DST reg count target ++.endm ++#endif /* CONFIG_COPY_MC_TEST */ ++#endif /* __ASSEMBLY__ */ ++#endif /* _COPY_MC_TEST_H_ */ +--- a/arch/x86/include/asm/mce.h ++++ b/arch/x86/include/asm/mce.h +@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain( + + extern int mce_p5_enabled; + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++extern void enable_copy_mc_fragile(void); ++unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); ++#else ++static inline void enable_copy_mc_fragile(void) ++{ ++} ++#endif ++ + #ifdef CONFIG_X86_MCE + int mcheck_init(void); + void mcheck_cpu_init(struct cpuinfo_x86 *c); +--- a/arch/x86/include/asm/mcsafe_test.h ++++ /dev/null +@@ -1,75 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef _MCSAFE_TEST_H_ +-#define _MCSAFE_TEST_H_ +- +-#ifndef __ASSEMBLY__ +-#ifdef CONFIG_MCSAFE_TEST +-extern unsigned long mcsafe_test_src; +-extern unsigned long mcsafe_test_dst; +- +-static inline void mcsafe_inject_src(void *addr) +-{ +- if (addr) +- mcsafe_test_src = (unsigned long) addr; +- else +- mcsafe_test_src = ~0UL; +-} +- +-static inline void mcsafe_inject_dst(void *addr) +-{ +- if (addr) +- mcsafe_test_dst = (unsigned long) addr; +- else +- mcsafe_test_dst = ~0UL; +-} +-#else /* CONFIG_MCSAFE_TEST */ +-static inline void mcsafe_inject_src(void *addr) +-{ +-} +- +-static inline void mcsafe_inject_dst(void *addr) +-{ +-} +-#endif /* CONFIG_MCSAFE_TEST */ +- +-#else /* __ASSEMBLY__ */ +-#include +- +-#ifdef CONFIG_MCSAFE_TEST +-.macro MCSAFE_TEST_CTL +- .pushsection .data +- .align 8 +- .globl mcsafe_test_src +- mcsafe_test_src: +- .quad 0 +- EXPORT_SYMBOL_GPL(mcsafe_test_src) +- .globl mcsafe_test_dst +- mcsafe_test_dst: +- .quad 0 +- EXPORT_SYMBOL_GPL(mcsafe_test_dst) +- .popsection +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +- leaq \count(\reg), %r9 +- cmp mcsafe_test_src, %r9 +- ja \target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +- leaq \count(\reg), %r9 +- cmp mcsafe_test_dst, %r9 +- ja \target +-.endm +-#else +-.macro MCSAFE_TEST_CTL +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +-.endm +-#endif /* CONFIG_MCSAFE_TEST */ +-#endif /* __ASSEMBLY__ */ +-#endif /* _MCSAFE_TEST_H_ */ +--- a/arch/x86/include/asm/string_64.h ++++ b/arch/x86/include/asm/string_64.h +@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *c + + #endif + +-#define __HAVE_ARCH_MEMCPY_MCSAFE 1 +-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, +- size_t cnt); +-DECLARE_STATIC_KEY_FALSE(mcsafe_key); +- +-/** +- * memcpy_mcsafe - copy memory with indication if a machine check happened +- * +- * @dst: destination address +- * @src: source address +- * @cnt: number of bytes to copy +- * +- * Low level memory copy function that catches machine checks +- * We only call into the "safe" function on systems that can +- * actually do machine check recovery. Everyone else can just +- * use memcpy(). +- * +- * Return 0 for success, or number of bytes not copied if there was an +- * exception. +- */ +-static __always_inline __must_check unsigned long +-memcpy_mcsafe(void *dst, const void *src, size_t cnt) +-{ +-#ifdef CONFIG_X86_MCE +- if (static_branch_unlikely(&mcsafe_key)) +- return __memcpy_mcsafe(dst, src, cnt); +- else +-#endif +- memcpy(dst, src, cnt); +- return 0; +-} +- + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE + #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 + void __memcpy_flushcache(void *dst, const void *src, size_t cnt); +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(co + unsigned long __must_check clear_user(void __user *mem, unsigned long len); + unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++unsigned long __must_check ++copy_mc_to_kernel(void *to, const void *from, unsigned len); ++#define copy_mc_to_kernel copy_mc_to_kernel ++ ++unsigned long __must_check ++copy_mc_to_user(void *to, const void *from, unsigned len); ++#endif ++ + /* + * movsl can be slow when source and dest are not both 8-byte aligned + */ +--- a/arch/x86/include/asm/uaccess_64.h ++++ b/arch/x86/include/asm/uaccess_64.h +@@ -47,22 +47,6 @@ copy_user_generic(void *to, const void * + } + + static __always_inline __must_check unsigned long +-copy_to_user_mcsafe(void *to, const void *from, unsigned len) +-{ +- unsigned long ret; +- +- __uaccess_begin(); +- /* +- * Note, __memcpy_mcsafe() is explicitly used since it can +- * handle exceptions / faults. memcpy_mcsafe() may fall back to +- * memcpy() which lacks this handling. +- */ +- ret = __memcpy_mcsafe(to, from, len); +- __uaccess_end(); +- return ret; +-} +- +-static __always_inline __must_check unsigned long + raw_copy_from_user(void *dst, const void __user *src, unsigned long size) + { + return copy_user_generic(dst, (__force void *)src, size); +@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, c + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); + } +- +-unsigned long +-mcsafe_handle_tail(char *to, char *from, unsigned len); +- + #endif /* _ASM_X86_UACCESS_64_H */ +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -40,7 +40,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank) + and older. + * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold +- * mce=recovery force enable memcpy_mcsafe() ++ * mce=recovery force enable copy_mc_fragile() + */ + static int __init mcheck_enable(char *str) + { +@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(v + static void __init mcheck_debugfs_init(void) { } + #endif + +-DEFINE_STATIC_KEY_FALSE(mcsafe_key); +-EXPORT_SYMBOL_GPL(mcsafe_key); +- + static int __init mcheck_late_init(void) + { + if (mca_cfg.recovery) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + + mcheck_debugfs_init(); + +--- a/arch/x86/kernel/quirks.c ++++ b/arch/x86/kernel/quirks.c +@@ -8,6 +8,7 @@ + + #include + #include ++#include + + #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) + +@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, + amd_disable_seq_and_redirect_scrub); + +-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +-#include +-#include +- + /* Ivy Bridge, Haswell, Broadwell */ + static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) + { +@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_r + pci_read_config_dword(pdev, 0x84, &capid0); + + if (capid0 & 0x10) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + } + + /* Skylake */ +@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_ + * enabled, so memory machine check recovery is also enabled. + */ + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + + } + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); +@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IN + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); + #endif +-#endif + + bool x86_apple_machine; + EXPORT_SYMBOL(x86_apple_machine); +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp + lib-y := delay.o misc.o cmdline.o cpu.o + lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o + lib-y += memcpy_$(BITS).o ++lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +--- /dev/null ++++ b/arch/x86/lib/copy_mc.c +@@ -0,0 +1,82 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifdef CONFIG_X86_MCE ++/* ++ * See COPY_MC_TEST for self-test of the copy_mc_fragile() ++ * implementation. ++ */ ++static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key); ++ ++void enable_copy_mc_fragile(void) ++{ ++ static_branch_inc(©_mc_fragile_key); ++} ++#define copy_mc_fragile_enabled (static_branch_unlikely(©_mc_fragile_key)) ++ ++/* ++ * Similar to copy_user_handle_tail, probe for the write fault point, or ++ * source exception point. ++ */ ++__visible notrace unsigned long ++copy_mc_fragile_handle_tail(char *to, char *from, unsigned len) ++{ ++ for (; len; --len, to++, from++) ++ if (copy_mc_fragile(to, from, 1)) ++ break; ++ return len; ++} ++#else ++/* ++ * No point in doing careful copying, or consulting a static key when ++ * there is no #MC handler in the CONFIG_X86_MCE=n case. ++ */ ++void enable_copy_mc_fragile(void) ++{ ++} ++#define copy_mc_fragile_enabled (0) ++#endif ++ ++/** ++ * copy_mc_to_kernel - memory copy that handles source exceptions ++ * ++ * @dst: destination address ++ * @src: source address ++ * @len: number of bytes to copy ++ * ++ * Call into the 'fragile' version on systems that have trouble ++ * actually do machine check recovery. Everyone else can just ++ * use memcpy(). ++ * ++ * Return 0 for success, or number of bytes not copied if there was an ++ * exception. ++ */ ++unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len) ++{ ++ if (copy_mc_fragile_enabled) ++ return copy_mc_fragile(dst, src, len); ++ memcpy(dst, src, len); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(copy_mc_to_kernel); ++ ++unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len) ++{ ++ unsigned long ret; ++ ++ if (!copy_mc_fragile_enabled) ++ return copy_user_generic(dst, src, len); ++ ++ __uaccess_begin(); ++ ret = copy_mc_fragile(dst, src, len); ++ __uaccess_end(); ++ return ret; ++} +--- /dev/null ++++ b/arch/x86/lib/copy_mc_64.S +@@ -0,0 +1,127 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ ++ ++#include ++#include ++#include ++#include ++ ++#ifndef CONFIG_UML ++ ++#ifdef CONFIG_X86_MCE ++COPY_MC_TEST_CTL ++ ++/* ++ * copy_mc_fragile - copy memory with indication if an exception / fault happened ++ * ++ * The 'fragile' version is opted into by platform quirks and takes ++ * pains to avoid unrecoverable corner cases like 'fast-string' ++ * instruction sequences, and consuming poison across a cacheline ++ * boundary. The non-fragile version is equivalent to memcpy() ++ * regardless of CPU machine-check-recovery capability. ++ */ ++SYM_FUNC_START(copy_mc_fragile) ++ cmpl $8, %edx ++ /* Less than 8 bytes? Go to byte copy loop */ ++ jb .L_no_whole_words ++ ++ /* Check for bad alignment of source */ ++ testl $7, %esi ++ /* Already aligned */ ++ jz .L_8byte_aligned ++ ++ /* Copy one byte at a time until source is 8-byte aligned */ ++ movl %esi, %ecx ++ andl $7, %ecx ++ subl $8, %ecx ++ negl %ecx ++ subl %ecx, %edx ++.L_read_leading_bytes: ++ movb (%rsi), %al ++ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes ++ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes ++.L_write_leading_bytes: ++ movb %al, (%rdi) ++ incq %rsi ++ incq %rdi ++ decl %ecx ++ jnz .L_read_leading_bytes ++ ++.L_8byte_aligned: ++ movl %edx, %ecx ++ andl $7, %edx ++ shrl $3, %ecx ++ jz .L_no_whole_words ++ ++.L_read_words: ++ movq (%rsi), %r8 ++ COPY_MC_TEST_SRC %rsi 8 .E_read_words ++ COPY_MC_TEST_DST %rdi 8 .E_write_words ++.L_write_words: ++ movq %r8, (%rdi) ++ addq $8, %rsi ++ addq $8, %rdi ++ decl %ecx ++ jnz .L_read_words ++ ++ /* Any trailing bytes? */ ++.L_no_whole_words: ++ andl %edx, %edx ++ jz .L_done_memcpy_trap ++ ++ /* Copy trailing bytes */ ++ movl %edx, %ecx ++.L_read_trailing_bytes: ++ movb (%rsi), %al ++ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes ++ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes ++.L_write_trailing_bytes: ++ movb %al, (%rdi) ++ incq %rsi ++ incq %rdi ++ decl %ecx ++ jnz .L_read_trailing_bytes ++ ++ /* Copy successful. Return zero */ ++.L_done_memcpy_trap: ++ xorl %eax, %eax ++.L_done: ++ ret ++SYM_FUNC_END(copy_mc_fragile) ++EXPORT_SYMBOL_GPL(copy_mc_fragile) ++ ++ .section .fixup, "ax" ++ /* ++ * Return number of bytes not copied for any failure. Note that ++ * there is no "tail" handling since the source buffer is 8-byte ++ * aligned and poison is cacheline aligned. ++ */ ++.E_read_words: ++ shll $3, %ecx ++.E_leading_bytes: ++ addl %edx, %ecx ++.E_trailing_bytes: ++ mov %ecx, %eax ++ jmp .L_done ++ ++ /* ++ * For write fault handling, given the destination is unaligned, ++ * we handle faults on multi-byte writes with a byte-by-byte ++ * copy up to the write-protected page. ++ */ ++.E_write_words: ++ shll $3, %ecx ++ addl %edx, %ecx ++ movl %ecx, %edx ++ jmp copy_mc_fragile_handle_tail ++ ++ .previous ++ ++ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) ++ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) ++ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) ++ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) ++ _ASM_EXTABLE(.L_write_words, .E_write_words) ++ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) ++#endif /* CONFIG_X86_MCE */ ++#endif /* !CONFIG_UML */ +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -4,7 +4,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + SYM_FUNC_END(memcpy_orig) + + .popsection +- +-#ifndef CONFIG_UML +- +-MCSAFE_TEST_CTL +- +-/* +- * __memcpy_mcsafe - memory copy with machine check exception handling +- * Note that we only catch machine checks when reading the source addresses. +- * Writes to target are posted and don't generate machine checks. +- */ +-SYM_FUNC_START(__memcpy_mcsafe) +- cmpl $8, %edx +- /* Less than 8 bytes? Go to byte copy loop */ +- jb .L_no_whole_words +- +- /* Check for bad alignment of source */ +- testl $7, %esi +- /* Already aligned */ +- jz .L_8byte_aligned +- +- /* Copy one byte at a time until source is 8-byte aligned */ +- movl %esi, %ecx +- andl $7, %ecx +- subl $8, %ecx +- negl %ecx +- subl %ecx, %edx +-.L_read_leading_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +-.L_write_leading_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_leading_bytes +- +-.L_8byte_aligned: +- movl %edx, %ecx +- andl $7, %edx +- shrl $3, %ecx +- jz .L_no_whole_words +- +-.L_read_words: +- movq (%rsi), %r8 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words +- MCSAFE_TEST_DST %rdi 8 .E_write_words +-.L_write_words: +- movq %r8, (%rdi) +- addq $8, %rsi +- addq $8, %rdi +- decl %ecx +- jnz .L_read_words +- +- /* Any trailing bytes? */ +-.L_no_whole_words: +- andl %edx, %edx +- jz .L_done_memcpy_trap +- +- /* Copy trailing bytes */ +- movl %edx, %ecx +-.L_read_trailing_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +-.L_write_trailing_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_trailing_bytes +- +- /* Copy successful. Return zero */ +-.L_done_memcpy_trap: +- xorl %eax, %eax +-.L_done: +- ret +-SYM_FUNC_END(__memcpy_mcsafe) +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe) +- +- .section .fixup, "ax" +- /* +- * Return number of bytes not copied for any failure. Note that +- * there is no "tail" handling since the source buffer is 8-byte +- * aligned and poison is cacheline aligned. +- */ +-.E_read_words: +- shll $3, %ecx +-.E_leading_bytes: +- addl %edx, %ecx +-.E_trailing_bytes: +- mov %ecx, %eax +- jmp .L_done +- +- /* +- * For write fault handling, given the destination is unaligned, +- * we handle faults on multi-byte writes with a byte-by-byte +- * copy up to the write-protected page. +- */ +-.E_write_words: +- shll $3, %ecx +- addl %edx, %ecx +- movl %ecx, %edx +- jmp mcsafe_handle_tail +- +- .previous +- +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE(.L_write_words, .E_write_words) +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) +-#endif +--- a/arch/x86/lib/usercopy_64.c ++++ b/arch/x86/lib/usercopy_64.c +@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to + } + EXPORT_SYMBOL(clear_user); + +-/* +- * Similar to copy_user_handle_tail, probe for the write fault point, +- * but reuse __memcpy_mcsafe in case a new read error is encountered. +- * clac() is handled in _copy_to_iter_mcsafe(). +- */ +-__visible notrace unsigned long +-mcsafe_handle_tail(char *to, char *from, unsigned len) +-{ +- for (; len; --len, to++, from++) { +- /* +- * Call the assembly routine back directly since +- * memcpy_mcsafe() may silently fallback to memcpy. +- */ +- unsigned long rem = __memcpy_mcsafe(to, from, 1); +- +- if (rem) +- break; +- } +- return len; +-} +- + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE + /** + * clean_cache_range - write back a cache range with CLWB +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -49,7 +49,7 @@ do { \ + #define pmem_assign(dest, src) ((dest) = (src)) + #endif + +-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) ++#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM) + #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS + #endif + +@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_ + } + wc->freelist_size = 0; + +- r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); ++ r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count, ++ sizeof(uint64_t)); + if (r) { + writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); + sb_seq_count = cpu_to_le64(0); +@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_ + e->seq_count = -1; + continue; + } +- r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); ++ r = copy_mc_to_kernel(&wme, memory_entry(wc, e), ++ sizeof(struct wc_memory_entry)); + if (r) { + writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", + (unsigned long)b, r); +@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_wri + + if (rw == READ) { + int r; +- r = memcpy_mcsafe(buf, data, size); ++ r = copy_mc_to_kernel(buf, data, size); + flush_dcache_page(bio_page(bio)); + if (unlikely(r)) { + writecache_error(wc, r, "hardware memory error when reading data: %d", r); +@@ -2349,7 +2351,7 @@ invalid_optional: + } + } + +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); ++ r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock)); + if (r) { + ti->error = "Hardware memory error when reading superblock"; + goto bad; +@@ -2360,7 +2362,8 @@ invalid_optional: + ti->error = "Unable to initialize device"; + goto bad; + } +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); ++ r = copy_mc_to_kernel(&s, sb(wc), ++ sizeof(struct wc_memory_superblock)); + if (r) { + ti->error = "Hardware memory error when reading superblock"; + goto bad; +--- a/drivers/nvdimm/claim.c ++++ b/drivers/nvdimm/claim.c +@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_names + if (rw == READ) { + if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) + return -EIO; +- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) ++ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0) + return -EIO; + return 0; + } +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct pag + while (len) { + mem = kmap_atomic(page); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); +- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); ++ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); + kunmap_atomic(mem); + if (rem) + return BLK_STS_IOERR; +@@ -305,7 +305,7 @@ static long pmem_dax_direct_access(struc + + /* + * Use the 'no check' versions of copy_from_iter_flushcache() and +- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds ++ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds + * checking, both file offset and device offset, is handled by + * dax_iomap_actor() + */ +@@ -318,7 +318,7 @@ static size_t pmem_copy_from_iter(struct + static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) + { +- return _copy_to_iter_mcsafe(addr, bytes, i); ++ return _copy_mc_to_iter(addr, bytes, i); + } + + static const struct dax_operations pmem_dax_ops = { +--- a/include/linux/string.h ++++ b/include/linux/string.h +@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void + #ifndef __HAVE_ARCH_MEMCHR + extern void * memchr(const void *,int,__kernel_size_t); + #endif +-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE +-static inline __must_check unsigned long memcpy_mcsafe(void *dst, +- const void *src, size_t cnt) +-{ +- memcpy(dst, src, cnt); +- return 0; +-} +-#endif + #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE + static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) + { + memcpy(dst, src, cnt); + } + #endif ++ + void *memchr_inv(const void *s, int c, size_t n); + char *strreplace(char *s, char old, char new); + +--- a/include/linux/uaccess.h ++++ b/include/linux/uaccess.h +@@ -163,6 +163,19 @@ copy_in_user(void __user *to, const void + } + #endif + ++#ifndef copy_mc_to_kernel ++/* ++ * Without arch opt-in this generic copy_mc_to_kernel() will not handle ++ * #MC (or arch equivalent) during source read. ++ */ ++static inline unsigned long __must_check ++copy_mc_to_kernel(void *dst, const void *src, size_t cnt) ++{ ++ memcpy(dst, src, cnt); ++ return 0; ++} ++#endif ++ + static __always_inline void pagefault_disabled_inc(void) + { + current->pagefault_disabled++; +--- a/include/linux/uio.h ++++ b/include/linux/uio.h +@@ -186,10 +186,10 @@ size_t _copy_from_iter_flushcache(void * + #define _copy_from_iter_flushcache _copy_from_iter_nocache + #endif + +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); + #else +-#define _copy_to_iter_mcsafe _copy_to_iter ++#define _copy_mc_to_iter _copy_to_iter + #endif + + static __always_inline __must_check +@@ -202,12 +202,12 @@ size_t copy_from_iter_flushcache(void *a + } + + static __always_inline __must_check +-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) ++size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) + { + if (unlikely(!check_copy_size(addr, bytes, true))) + return 0; + else +- return _copy_to_iter_mcsafe(addr, bytes, i); ++ return _copy_mc_to_iter(addr, bytes, i); + } + + size_t iov_iter_zero(size_t bytes, struct iov_iter *); +--- a/lib/Kconfig ++++ b/lib/Kconfig +@@ -631,7 +631,12 @@ config UACCESS_MEMCPY + config ARCH_HAS_UACCESS_FLUSHCACHE + bool + +-config ARCH_HAS_UACCESS_MCSAFE ++# arch has a concept of a recoverable synchronous exception due to a ++# memory-read error like x86 machine-check or ARM data-abort, and ++# implements copy_mc_to_{user,kernel} to abort and report ++# 'bytes-transferred' if that exception fires when accessing the source ++# buffer. ++config ARCH_HAS_COPY_MC + bool + + # Temporary. Goes away when all archs are cleaned up +--- a/lib/iov_iter.c ++++ b/lib/iov_iter.c +@@ -636,30 +636,30 @@ size_t _copy_to_iter(const void *addr, s + } + EXPORT_SYMBOL(_copy_to_iter); + +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +-static int copyout_mcsafe(void __user *to, const void *from, size_t n) ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++static int copyout_mc(void __user *to, const void *from, size_t n) + { + if (access_ok(to, n)) { + instrument_copy_to_user(to, from, n); +- n = copy_to_user_mcsafe((__force void *) to, from, n); ++ n = copy_mc_to_user((__force void *) to, from, n); + } + return n; + } + +-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, ++static unsigned long copy_mc_to_page(struct page *page, size_t offset, + const char *from, size_t len) + { + unsigned long ret; + char *to; + + to = kmap_atomic(page); +- ret = memcpy_mcsafe(to + offset, from, len); ++ ret = copy_mc_to_kernel(to + offset, from, len); + kunmap_atomic(to); + + return ret; + } + +-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, ++static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, + struct iov_iter *i) + { + struct pipe_inode_info *pipe = i->pipe; +@@ -677,7 +677,7 @@ static size_t copy_pipe_to_iter_mcsafe(c + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + +- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, ++ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, + off, addr, chunk); + i->head = i_head; + i->iov_offset = off + chunk - rem; +@@ -694,18 +694,17 @@ static size_t copy_pipe_to_iter_mcsafe(c + } + + /** +- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling ++ * _copy_mc_to_iter - copy to iter with source memory error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * +- * The pmem driver arranges for filesystem-dax to use this facility via +- * dax_copy_to_iter() for protecting read/write to persistent memory. +- * Unless / until an architecture can guarantee identical performance +- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a +- * performance regression to switch more users to the mcsafe version. ++ * The pmem driver deploys this for the dax operation ++ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the ++ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes ++ * successfully copied. + * +- * Otherwise, the main differences between this and typical _copy_to_iter(). ++ * The main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine +@@ -716,23 +715,22 @@ static size_t copy_pipe_to_iter_mcsafe(c + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. +- * +- * See MCSAFE_TEST for self-test. + */ +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) + { + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(iov_iter_is_pipe(i))) +- return copy_pipe_to_iter_mcsafe(addr, bytes, i); ++ return copy_mc_pipe_to_iter(addr, bytes, i); + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, +- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), ++ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, ++ v.iov_len), + ({ +- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, +- (from += v.bv_len) - v.bv_len, v.bv_len); ++ rem = copy_mc_to_page(v.bv_page, v.bv_offset, ++ (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; +@@ -740,8 +738,8 @@ size_t _copy_to_iter_mcsafe(const void * + } + }), + ({ +- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, +- v.iov_len); ++ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) ++ - v.iov_len, v.iov_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; +@@ -752,8 +750,8 @@ size_t _copy_to_iter_mcsafe(const void * + + return bytes; + } +-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); +-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ ++EXPORT_SYMBOL_GPL(_copy_mc_to_iter); ++#endif /* CONFIG_ARCH_HAS_COPY_MC */ + + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) + { +--- a/tools/arch/x86/include/asm/mcsafe_test.h ++++ /dev/null +@@ -1,13 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef _MCSAFE_TEST_H_ +-#define _MCSAFE_TEST_H_ +- +-.macro MCSAFE_TEST_CTL +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +-.endm +-#endif /* _MCSAFE_TEST_H_ */ +--- a/tools/arch/x86/lib/memcpy_64.S ++++ b/tools/arch/x86/lib/memcpy_64.S +@@ -4,7 +4,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig) + SYM_FUNC_END(memcpy_orig) + + .popsection +- +-#ifndef CONFIG_UML +- +-MCSAFE_TEST_CTL +- +-/* +- * __memcpy_mcsafe - memory copy with machine check exception handling +- * Note that we only catch machine checks when reading the source addresses. +- * Writes to target are posted and don't generate machine checks. +- */ +-SYM_FUNC_START(__memcpy_mcsafe) +- cmpl $8, %edx +- /* Less than 8 bytes? Go to byte copy loop */ +- jb .L_no_whole_words +- +- /* Check for bad alignment of source */ +- testl $7, %esi +- /* Already aligned */ +- jz .L_8byte_aligned +- +- /* Copy one byte at a time until source is 8-byte aligned */ +- movl %esi, %ecx +- andl $7, %ecx +- subl $8, %ecx +- negl %ecx +- subl %ecx, %edx +-.L_read_leading_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +-.L_write_leading_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_leading_bytes +- +-.L_8byte_aligned: +- movl %edx, %ecx +- andl $7, %edx +- shrl $3, %ecx +- jz .L_no_whole_words +- +-.L_read_words: +- movq (%rsi), %r8 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words +- MCSAFE_TEST_DST %rdi 8 .E_write_words +-.L_write_words: +- movq %r8, (%rdi) +- addq $8, %rsi +- addq $8, %rdi +- decl %ecx +- jnz .L_read_words +- +- /* Any trailing bytes? */ +-.L_no_whole_words: +- andl %edx, %edx +- jz .L_done_memcpy_trap +- +- /* Copy trailing bytes */ +- movl %edx, %ecx +-.L_read_trailing_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +-.L_write_trailing_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_trailing_bytes +- +- /* Copy successful. Return zero */ +-.L_done_memcpy_trap: +- xorl %eax, %eax +-.L_done: +- ret +-SYM_FUNC_END(__memcpy_mcsafe) +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe) +- +- .section .fixup, "ax" +- /* +- * Return number of bytes not copied for any failure. Note that +- * there is no "tail" handling since the source buffer is 8-byte +- * aligned and poison is cacheline aligned. +- */ +-.E_read_words: +- shll $3, %ecx +-.E_leading_bytes: +- addl %edx, %ecx +-.E_trailing_bytes: +- mov %ecx, %eax +- jmp .L_done +- +- /* +- * For write fault handling, given the destination is unaligned, +- * we handle faults on multi-byte writes with a byte-by-byte +- * copy up to the write-protected page. +- */ +-.E_write_words: +- shll $3, %ecx +- addl %edx, %ecx +- movl %ecx, %edx +- jmp mcsafe_handle_tail +- +- .previous +- +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE(.L_write_words, .E_write_words) +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) +-#endif +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -548,8 +548,8 @@ static const char *uaccess_safe_builtin[ + "__ubsan_handle_shift_out_of_bounds", + /* misc */ + "csum_partial_copy_generic", +- "__memcpy_mcsafe", +- "mcsafe_handle_tail", ++ "copy_mc_fragile", ++ "copy_mc_fragile_handle_tail", + "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + NULL + }; +--- a/tools/perf/bench/Build ++++ b/tools/perf/bench/Build +@@ -11,7 +11,6 @@ perf-y += epoll-ctl.o + perf-y += synthesize.o + perf-y += kallsyms-parse.o + +-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o + perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o + perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c ++++ /dev/null +@@ -1,24 +0,0 @@ +-/* +- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy +- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' +- * happy. +- */ +-#include +- +-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); +- +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) +-{ +- for (; len; --len, to++, from++) { +- /* +- * Call the assembly routine back directly since +- * memcpy_mcsafe() may silently fallback to memcpy. +- */ +- unsigned long rem = __memcpy_mcsafe(to, from, 1); +- +- if (rem) +- break; +- } +- return len; +-} +--- a/tools/testing/nvdimm/test/nfit.c ++++ b/tools/testing/nvdimm/test/nfit.c +@@ -23,7 +23,8 @@ + #include "nfit_test.h" + #include "../watermark.h" + +-#include ++#include ++#include + + /* + * Generate an NFIT table to describe the following topology: +@@ -3052,7 +3053,7 @@ static struct platform_driver nfit_test_ + .id_table = nfit_test_id, + }; + +-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); ++static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + + enum INJECT { + INJECT_NONE, +@@ -3060,7 +3061,7 @@ enum INJECT { + INJECT_DST, + }; + +-static void mcsafe_test_init(char *dst, char *src, size_t size) ++static void copy_mc_test_init(char *dst, char *src, size_t size) + { + size_t i; + +@@ -3069,7 +3070,7 @@ static void mcsafe_test_init(char *dst, + src[i] = (char) i; + } + +-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, ++static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, + size_t size, unsigned long rem) + { + size_t i; +@@ -3090,12 +3091,12 @@ static bool mcsafe_test_validate(unsigne + return true; + } + +-void mcsafe_test(void) ++void copy_mc_test(void) + { + char *inject_desc[] = { "none", "source", "destination" }; + enum INJECT inj; + +- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { ++ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { + pr_info("%s: run...\n", __func__); + } else { + pr_info("%s: disabled, skip.\n", __func__); +@@ -3113,31 +3114,31 @@ void mcsafe_test(void) + + switch (inj) { + case INJECT_NONE: +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(NULL); +- dst = &mcsafe_buf[2048]; +- src = &mcsafe_buf[1024 - i]; ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(NULL); ++ dst = ©_mc_buf[2048]; ++ src = ©_mc_buf[1024 - i]; + expect = 0; + break; + case INJECT_SRC: +- mcsafe_inject_src(&mcsafe_buf[1024]); +- mcsafe_inject_dst(NULL); +- dst = &mcsafe_buf[2048]; +- src = &mcsafe_buf[1024 - i]; ++ copy_mc_inject_src(©_mc_buf[1024]); ++ copy_mc_inject_dst(NULL); ++ dst = ©_mc_buf[2048]; ++ src = ©_mc_buf[1024 - i]; + expect = 512 - i; + break; + case INJECT_DST: +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(&mcsafe_buf[2048]); +- dst = &mcsafe_buf[2048 - i]; +- src = &mcsafe_buf[1024]; ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(©_mc_buf[2048]); ++ dst = ©_mc_buf[2048 - i]; ++ src = ©_mc_buf[1024]; + expect = 512 - i; + break; + } + +- mcsafe_test_init(dst, src, 512); +- rem = __memcpy_mcsafe(dst, src, 512); +- valid = mcsafe_test_validate(dst, src, 512, expect); ++ copy_mc_test_init(dst, src, 512); ++ rem = copy_mc_fragile(dst, src, 512); ++ valid = copy_mc_test_validate(dst, src, 512, expect); + if (rem == expect && valid) + continue; + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", +@@ -3149,8 +3150,8 @@ void mcsafe_test(void) + } + } + +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(NULL); ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(NULL); + } + + static __init int nfit_test_init(void) +@@ -3161,7 +3162,7 @@ static __init int nfit_test_init(void) + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); +- mcsafe_test(); ++ copy_mc_test(); + dax_pmem_test(); + dax_pmem_core_test(); + #ifdef CONFIG_DEV_DAX_PMEM_COMPAT +--- a/tools/testing/selftests/powerpc/copyloops/.gitignore ++++ b/tools/testing/selftests/powerpc/copyloops/.gitignore +@@ -12,4 +12,4 @@ memcpy_p7_t1 + copyuser_64_exc_t0 + copyuser_64_exc_t1 + copyuser_64_exc_t2 +-memcpy_mcsafe_64 ++copy_mc_64 +--- a/tools/testing/selftests/powerpc/copyloops/Makefile ++++ b/tools/testing/selftests/powerpc/copyloops/Makefile +@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 + TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ + copyuser_p7_t0 copyuser_p7_t1 \ + memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ +- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ ++ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + + EXTRA_SOURCES := validate.c ../harness.c stubs.S +@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S + -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ + -o $@ $^ + +-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) ++$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ +- -D COPY_LOOP=test_memcpy_mcsafe \ ++ -D COPY_LOOP=test_copy_mc_generic \ + -o $@ $^ + + $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ +--- /dev/null ++++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) IBM Corporation, 2011 ++ * Derived from copyuser_power7.s by Anton Blanchard ++ * Author - Balbir Singh ++ */ ++#include ++#include ++#include ++ ++ .macro err1 ++100: ++ EX_TABLE(100b,.Ldo_err1) ++ .endm ++ ++ .macro err2 ++200: ++ EX_TABLE(200b,.Ldo_err2) ++ .endm ++ ++ .macro err3 ++300: EX_TABLE(300b,.Ldone) ++ .endm ++ ++.Ldo_err2: ++ ld r22,STK_REG(R22)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r14,STK_REG(R14)(r1) ++ addi r1,r1,STACKFRAMESIZE ++.Ldo_err1: ++ /* Do a byte by byte copy to get the exact remaining size */ ++ mtctr r7 ++46: ++err3; lbz r0,0(r4) ++ addi r4,r4,1 ++err3; stb r0,0(r3) ++ addi r3,r3,1 ++ bdnz 46b ++ li r3,0 ++ blr ++ ++.Ldone: ++ mfctr r3 ++ blr ++ ++ ++_GLOBAL(copy_mc_generic) ++ mr r7,r5 ++ cmpldi r5,16 ++ blt .Lshort_copy ++ ++.Lcopy: ++ /* Get the source 8B aligned */ ++ neg r6,r4 ++ mtocrf 0x01,r6 ++ clrldi r6,r6,(64-3) ++ ++ bf cr7*4+3,1f ++err1; lbz r0,0(r4) ++ addi r4,r4,1 ++err1; stb r0,0(r3) ++ addi r3,r3,1 ++ subi r7,r7,1 ++ ++1: bf cr7*4+2,2f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++2: bf cr7*4+1,3f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++3: sub r5,r5,r6 ++ cmpldi r5,128 ++ ++ mflr r0 ++ stdu r1,-STACKFRAMESIZE(r1) ++ std r14,STK_REG(R14)(r1) ++ std r15,STK_REG(R15)(r1) ++ std r16,STK_REG(R16)(r1) ++ std r17,STK_REG(R17)(r1) ++ std r18,STK_REG(R18)(r1) ++ std r19,STK_REG(R19)(r1) ++ std r20,STK_REG(R20)(r1) ++ std r21,STK_REG(R21)(r1) ++ std r22,STK_REG(R22)(r1) ++ std r0,STACKFRAMESIZE+16(r1) ++ ++ blt 5f ++ srdi r6,r5,7 ++ mtctr r6 ++ ++ /* Now do cacheline (128B) sized loads and stores. */ ++ .align 5 ++4: ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++err2; ld r15,64(r4) ++err2; ld r16,72(r4) ++err2; ld r17,80(r4) ++err2; ld r18,88(r4) ++err2; ld r19,96(r4) ++err2; ld r20,104(r4) ++err2; ld r21,112(r4) ++err2; ld r22,120(r4) ++ addi r4,r4,128 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++err2; std r15,64(r3) ++err2; std r16,72(r3) ++err2; std r17,80(r3) ++err2; std r18,88(r3) ++err2; std r19,96(r3) ++err2; std r20,104(r3) ++err2; std r21,112(r3) ++err2; std r22,120(r3) ++ addi r3,r3,128 ++ subi r7,r7,128 ++ bdnz 4b ++ ++ clrldi r5,r5,(64-7) ++ ++ /* Up to 127B to go */ ++5: srdi r6,r5,4 ++ mtocrf 0x01,r6 ++ ++6: bf cr7*4+1,7f ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++ addi r4,r4,64 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++ addi r3,r3,64 ++ subi r7,r7,64 ++ ++7: ld r14,STK_REG(R14)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r22,STK_REG(R22)(r1) ++ addi r1,r1,STACKFRAMESIZE ++ ++ /* Up to 63B to go */ ++ bf cr7*4+2,8f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++err1; ld r8,16(r4) ++err1; ld r9,24(r4) ++ addi r4,r4,32 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++err1; std r8,16(r3) ++err1; std r9,24(r3) ++ addi r3,r3,32 ++ subi r7,r7,32 ++ ++ /* Up to 31B to go */ ++8: bf cr7*4+3,9f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++ addi r4,r4,16 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++ addi r3,r3,16 ++ subi r7,r7,16 ++ ++9: clrldi r5,r5,(64-4) ++ ++ /* Up to 15B to go */ ++.Lshort_copy: ++ mtocrf 0x01,r5 ++ bf cr7*4+0,12f ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ ++err1; lwz r6,4(r4) ++ addi r4,r4,8 ++err1; stw r0,0(r3) ++err1; stw r6,4(r3) ++ addi r3,r3,8 ++ subi r7,r7,8 ++ ++12: bf cr7*4+1,13f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++13: bf cr7*4+2,14f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++14: bf cr7*4+3,15f ++err1; lbz r0,0(r4) ++err1; stb r0,0(r3) ++ ++15: li r3,0 ++ blr ++ ++EXPORT_SYMBOL_GPL(copy_mc_generic); -- 2.47.3