From: Greg Kroah-Hartman Date: Tue, 17 Apr 2018 12:10:21 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.16.3~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1e6145017b02df46f7843cc1d9dca49468cd4b71;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: arm-arm64-kvm-add-psci_version-helper.patch arm-arm64-kvm-add-smccc-accessors-to-psci-code.patch arm-arm64-kvm-advertise-smccc-v1.1.patch arm-arm64-kvm-consolidate-the-psci-include-files.patch arm-arm64-kvm-implement-psci-1.0-support.patch arm-arm64-kvm-turn-kvm_psci_version-into-a-static-inline.patch arm-arm64-smccc-implement-smccc-v1.1-inline-primitive.patch arm-arm64-smccc-make-function-identifiers-an-unsigned-quantity.patch arm64-add-arm_smccc_arch_workaround_1-bp-hardening-support.patch arm64-add-skeleton-to-harden-the-branch-predictor-against-aliasing-attacks.patch arm64-barrier-add-csdb-macros-to-control-data-value-prediction.patch arm64-branch-predictor-hardening-for-cavium-thunderx2.patch arm64-cpu_errata-allow-an-erratum-to-be-match-for-all-revisions-of-a-core.patch arm64-cpufeature-__this_cpu_has_cap-shouldn-t-stop-early.patch arm64-cpufeature-pass-capability-structure-to-enable-callback.patch arm64-cputype-add-missing-midr-values-for-cortex-a72-and-cortex-a75.patch arm64-entry-apply-bp-hardening-for-high-priority-synchronous-exceptions.patch arm64-entry-apply-bp-hardening-for-suspicious-interrupts-from-el0.patch arm64-entry-ensure-branch-through-syscall-table-is-bounded-under-speculation.patch arm64-factor-out-ttbr0_el1-post-update-workaround-into-a-specific-asm-macro.patch arm64-implement-array_index_mask_nospec.patch arm64-implement-branch-predictor-hardening-for-affected-cortex-a-cpus.patch arm64-kill-psci_get_version-as-a-variant-2-workaround.patch arm64-kvm-add-smccc_arch_workaround_1-fast-handling.patch arm64-kvm-increment-pc-after-handling-an-smc-trap.patch arm64-kvm-make-psci_version-a-fast-path.patch arm64-kvm-report-smccc_arch_workaround_1-bp-hardening-support.patch arm64-kvm-use-per-cpu-vector-when-bp-hardening-is-enabled.patch arm64-make-user_ds-an-inclusive-limit.patch arm64-move-bp-hardening-to-check_and_switch_context.patch arm64-move-post_ttbr_update_workaround-to-c-code.patch arm64-move-task_-definitions-to-asm-processor.h.patch arm64-run-enable-method-for-errata-work-arounds-on-late-cpus.patch arm64-uaccess-don-t-bother-eliding-access_ok-checks-in-__-get-put-_user.patch arm64-uaccess-mask-__user-pointers-for-__arch_-clear-copy_-_user.patch arm64-uaccess-prevent-speculative-use-of-the-current-addr_limit.patch arm64-use-pointer-masking-to-limit-uaccess-speculation.patch drivers-firmware-expose-psci_get_version-through-psci_ops-structure.patch firmware-psci-expose-psci-conduit.patch firmware-psci-expose-smccc-version-through-psci_ops.patch mm-introduce-lm_alias.patch --- diff --git a/queue-4.9/arm-arm64-kvm-add-psci_version-helper.patch b/queue-4.9/arm-arm64-kvm-add-psci_version-helper.patch new file mode 100644 index 00000000000..895a397c601 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-add-psci_version-helper.patch @@ -0,0 +1,80 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:25 +0100 +Subject: [PATCH v4.9.y 29/42] arm/arm64: KVM: Add PSCI_VERSION helper +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-30-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit d0a144f12a7ca8368933eae6583c096c363ec506 upstream. + +As we're about to trigger a PSCI version explosion, it doesn't +hurt to introduce a PSCI_VERSION helper that is going to be +used everywhere. + +Reviewed-by: Christoffer Dall +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/kvm/psci.c | 4 +--- + include/kvm/arm_psci.h | 6 ++++-- + include/uapi/linux/psci.h | 3 +++ + 3 files changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -25,8 +25,6 @@ + + #include + +-#include +- + /* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. +@@ -220,7 +218,7 @@ static int kvm_psci_0_2_call(struct kvm_ + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ +- val = 2; ++ val = KVM_ARM_PSCI_0_2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: +--- a/include/kvm/arm_psci.h ++++ b/include/kvm/arm_psci.h +@@ -18,8 +18,10 @@ + #ifndef __KVM_ARM_PSCI_H__ + #define __KVM_ARM_PSCI_H__ + +-#define KVM_ARM_PSCI_0_1 1 +-#define KVM_ARM_PSCI_0_2 2 ++#include ++ ++#define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) ++#define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) + + int kvm_psci_version(struct kvm_vcpu *vcpu); + int kvm_psci_call(struct kvm_vcpu *vcpu); +--- a/include/uapi/linux/psci.h ++++ b/include/uapi/linux/psci.h +@@ -87,6 +87,9 @@ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) + #define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) ++#define PSCI_VERSION(maj, min) \ ++ ((((maj) << PSCI_VERSION_MAJOR_SHIFT) & PSCI_VERSION_MAJOR_MASK) | \ ++ ((min) & PSCI_VERSION_MINOR_MASK)) + + /* PSCI features decoding (>=1.0) */ + #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1 diff --git a/queue-4.9/arm-arm64-kvm-add-smccc-accessors-to-psci-code.patch b/queue-4.9/arm-arm64-kvm-add-smccc-accessors-to-psci-code.patch new file mode 100644 index 00000000000..4007079c903 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-add-smccc-accessors-to-psci-code.patch @@ -0,0 +1,146 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:26 +0100 +Subject: [PATCH v4.9.y 30/42] arm/arm64: KVM: Add smccc accessors to PSCI code +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-31-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 84684fecd7ea381824a96634a027b7719587fb77 upstream. + +Instead of open coding the accesses to the various registers, +let's add explicit SMCCC accessors. + +Reviewed-by: Christoffer Dall +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/kvm/psci.c | 52 ++++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 42 insertions(+), 10 deletions(-) + +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -32,6 +32,38 @@ + + #define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + ++static u32 smccc_get_function(struct kvm_vcpu *vcpu) ++{ ++ return vcpu_get_reg(vcpu, 0); ++} ++ ++static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu) ++{ ++ return vcpu_get_reg(vcpu, 1); ++} ++ ++static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu) ++{ ++ return vcpu_get_reg(vcpu, 2); ++} ++ ++static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu) ++{ ++ return vcpu_get_reg(vcpu, 3); ++} ++ ++static void smccc_set_retval(struct kvm_vcpu *vcpu, ++ unsigned long a0, ++ unsigned long a1, ++ unsigned long a2, ++ unsigned long a3) ++{ ++ vcpu_set_reg(vcpu, 0, a0); ++ vcpu_set_reg(vcpu, 1, a1); ++ vcpu_set_reg(vcpu, 2, a2); ++ vcpu_set_reg(vcpu, 3, a3); ++} ++ + static unsigned long psci_affinity_mask(unsigned long affinity_level) + { + if (affinity_level <= 3) +@@ -74,7 +106,7 @@ static unsigned long kvm_psci_vcpu_on(st + unsigned long context_id; + phys_addr_t target_pc; + +- cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK; ++ cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + +@@ -93,8 +125,8 @@ static unsigned long kvm_psci_vcpu_on(st + return PSCI_RET_INVALID_PARAMS; + } + +- target_pc = vcpu_get_reg(source_vcpu, 2); +- context_id = vcpu_get_reg(source_vcpu, 3); ++ target_pc = smccc_get_arg2(source_vcpu); ++ context_id = smccc_get_arg3(source_vcpu); + + kvm_reset_vcpu(vcpu); + +@@ -113,7 +145,7 @@ static unsigned long kvm_psci_vcpu_on(st + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general puspose registers are undefined upon CPU_ON. + */ +- vcpu_set_reg(vcpu, 0, context_id); ++ smccc_set_retval(vcpu, context_id, 0, 0, 0); + vcpu->arch.power_off = false; + smp_mb(); /* Make sure the above is visible */ + +@@ -133,8 +165,8 @@ static unsigned long kvm_psci_vcpu_affin + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + +- target_affinity = vcpu_get_reg(vcpu, 1); +- lowest_affinity_level = vcpu_get_reg(vcpu, 2); ++ target_affinity = smccc_get_arg1(vcpu); ++ lowest_affinity_level = smccc_get_arg2(vcpu); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); +@@ -208,7 +240,7 @@ int kvm_psci_version(struct kvm_vcpu *vc + static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) + { + struct kvm *kvm = vcpu->kvm; +- unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); ++ unsigned long psci_fn = smccc_get_function(vcpu); + unsigned long val; + int ret = 1; + +@@ -275,14 +307,14 @@ static int kvm_psci_0_2_call(struct kvm_ + break; + } + +- vcpu_set_reg(vcpu, 0, val); ++ smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; + } + + static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) + { + struct kvm *kvm = vcpu->kvm; +- unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); ++ unsigned long psci_fn = smccc_get_function(vcpu); + unsigned long val; + + switch (psci_fn) { +@@ -300,7 +332,7 @@ static int kvm_psci_0_1_call(struct kvm_ + break; + } + +- vcpu_set_reg(vcpu, 0, val); ++ smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; + } + diff --git a/queue-4.9/arm-arm64-kvm-advertise-smccc-v1.1.patch b/queue-4.9/arm-arm64-kvm-advertise-smccc-v1.1.patch new file mode 100644 index 00000000000..dc9385faea3 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-advertise-smccc-v1.1.patch @@ -0,0 +1,142 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:28 +0100 +Subject: [PATCH v4.9.y 32/42] arm/arm64: KVM: Advertise SMCCC v1.1 +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-33-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 09e6be12effdb33bf7210c8867bbd213b66a499e upstream. + +The new SMC Calling Convention (v1.1) allows for a reduced overhead +when calling into the firmware, and provides a new feature discovery +mechanism. + +Make it visible to KVM guests. + +Tested-by: Ard Biesheuvel +Reviewed-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/kvm/handle_exit.c | 2 +- + arch/arm/kvm/psci.c | 24 +++++++++++++++++++++++- + arch/arm64/kvm/handle_exit.c | 2 +- + include/kvm/arm_psci.h | 2 +- + include/linux/arm-smccc.h | 13 +++++++++++++ + 5 files changed, 39 insertions(+), 4 deletions(-) + +--- a/arch/arm/kvm/handle_exit.c ++++ b/arch/arm/kvm/handle_exit.c +@@ -36,7 +36,7 @@ static int handle_hvc(struct kvm_vcpu *v + kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; + +- ret = kvm_psci_call(vcpu); ++ ret = kvm_hvc_call_handler(vcpu); + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + return 1; +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -15,6 +15,7 @@ + * along with this program. If not, see . + */ + ++#include + #include + #include + #include +@@ -337,6 +338,7 @@ static int kvm_psci_1_0_call(struct kvm_ + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_1_0_FN_PSCI_FEATURES: ++ case ARM_SMCCC_VERSION_FUNC_ID: + val = 0; + break; + default: +@@ -391,7 +393,7 @@ static int kvm_psci_0_1_call(struct kvm_ + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +-int kvm_psci_call(struct kvm_vcpu *vcpu) ++static int kvm_psci_call(struct kvm_vcpu *vcpu) + { + switch (kvm_psci_version(vcpu)) { + case KVM_ARM_PSCI_1_0: +@@ -404,3 +406,23 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) + return -EINVAL; + }; + } ++ ++int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) ++{ ++ u32 func_id = smccc_get_function(vcpu); ++ u32 val = PSCI_RET_NOT_SUPPORTED; ++ ++ switch (func_id) { ++ case ARM_SMCCC_VERSION_FUNC_ID: ++ val = ARM_SMCCC_VERSION_1_1; ++ break; ++ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: ++ /* Nothing supported yet */ ++ break; ++ default: ++ return kvm_psci_call(vcpu); ++ } ++ ++ smccc_set_retval(vcpu, val, 0, 0, 0); ++ return 1; ++} +--- a/arch/arm64/kvm/handle_exit.c ++++ b/arch/arm64/kvm/handle_exit.c +@@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *v + kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; + +- ret = kvm_psci_call(vcpu); ++ ret = kvm_hvc_call_handler(vcpu); + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + return 1; +--- a/include/kvm/arm_psci.h ++++ b/include/kvm/arm_psci.h +@@ -27,6 +27,6 @@ + #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 + + int kvm_psci_version(struct kvm_vcpu *vcpu); +-int kvm_psci_call(struct kvm_vcpu *vcpu); ++int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); + + #endif /* __KVM_ARM_PSCI_H__ */ +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -60,6 +60,19 @@ + #define ARM_SMCCC_QUIRK_NONE 0 + #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ + ++#define ARM_SMCCC_VERSION_1_0 0x10000 ++#define ARM_SMCCC_VERSION_1_1 0x10001 ++ ++#define ARM_SMCCC_VERSION_FUNC_ID \ ++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ++ ARM_SMCCC_SMC_32, \ ++ 0, 0) ++ ++#define ARM_SMCCC_ARCH_FEATURES_FUNC_ID \ ++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ++ ARM_SMCCC_SMC_32, \ ++ 0, 1) ++ + #ifndef __ASSEMBLY__ + + #include diff --git a/queue-4.9/arm-arm64-kvm-consolidate-the-psci-include-files.patch b/queue-4.9/arm-arm64-kvm-consolidate-the-psci-include-files.patch new file mode 100644 index 00000000000..854ddd9b969 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-consolidate-the-psci-include-files.patch @@ -0,0 +1,191 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:24 +0100 +Subject: [PATCH v4.9.y 28/42] arm/arm64: KVM: Consolidate the PSCI include files +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-29-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 1a2fb94e6a771ff94f4afa22497a4695187b820c upstream. + +As we're about to update the PSCI support, and because I'm lazy, +let's move the PSCI include file to include/kvm so that both +ARM architectures can find it. + +Acked-by: Christoffer Dall +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/kvm_psci.h | 27 --------------------------- + arch/arm/kvm/arm.c | 2 +- + arch/arm/kvm/handle_exit.c | 2 +- + arch/arm/kvm/psci.c | 3 ++- + arch/arm64/include/asm/kvm_psci.h | 27 --------------------------- + arch/arm64/kvm/handle_exit.c | 5 ++++- + include/kvm/arm_psci.h | 27 +++++++++++++++++++++++++++ + 7 files changed, 35 insertions(+), 58 deletions(-) + delete mode 100644 arch/arm/include/asm/kvm_psci.h + rename arch/arm64/include/asm/kvm_psci.h => include/kvm/arm_psci.h (89%) + +--- a/arch/arm/include/asm/kvm_psci.h ++++ /dev/null +@@ -1,27 +0,0 @@ +-/* +- * Copyright (C) 2012 - ARM Ltd +- * Author: Marc Zyngier +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program. If not, see . +- */ +- +-#ifndef __ARM_KVM_PSCI_H__ +-#define __ARM_KVM_PSCI_H__ +- +-#define KVM_ARM_PSCI_0_1 1 +-#define KVM_ARM_PSCI_0_2 2 +- +-int kvm_psci_version(struct kvm_vcpu *vcpu); +-int kvm_psci_call(struct kvm_vcpu *vcpu); +- +-#endif /* __ARM_KVM_PSCI_H__ */ +--- a/arch/arm/kvm/arm.c ++++ b/arch/arm/kvm/arm.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #define CREATE_TRACE_POINTS + #include "trace.h" +@@ -44,7 +45,6 @@ + #include + #include + #include +-#include + #include + + #ifdef REQUIRES_VIRT +--- a/arch/arm/kvm/handle_exit.c ++++ b/arch/arm/kvm/handle_exit.c +@@ -21,7 +21,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include "trace.h" +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -21,9 +21,10 @@ + + #include + #include +-#include + #include + ++#include ++ + #include + + /* +--- a/arch/arm64/include/asm/kvm_psci.h ++++ /dev/null +@@ -1,27 +0,0 @@ +-/* +- * Copyright (C) 2012,2013 - ARM Ltd +- * Author: Marc Zyngier +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program. If not, see . +- */ +- +-#ifndef __ARM64_KVM_PSCI_H__ +-#define __ARM64_KVM_PSCI_H__ +- +-#define KVM_ARM_PSCI_0_1 1 +-#define KVM_ARM_PSCI_0_2 2 +- +-int kvm_psci_version(struct kvm_vcpu *vcpu); +-int kvm_psci_call(struct kvm_vcpu *vcpu); +- +-#endif /* __ARM64_KVM_PSCI_H__ */ +--- a/arch/arm64/kvm/handle_exit.c ++++ b/arch/arm64/kvm/handle_exit.c +@@ -22,12 +22,15 @@ + #include + #include + ++#include ++ + #include + #include + #include + #include + #include +-#include ++#include ++#include + + #define CREATE_TRACE_POINTS + #include "trace.h" +--- /dev/null ++++ b/include/kvm/arm_psci.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (C) 2012,2013 - ARM Ltd ++ * Author: Marc Zyngier ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#ifndef __KVM_ARM_PSCI_H__ ++#define __KVM_ARM_PSCI_H__ ++ ++#define KVM_ARM_PSCI_0_1 1 ++#define KVM_ARM_PSCI_0_2 2 ++ ++int kvm_psci_version(struct kvm_vcpu *vcpu); ++int kvm_psci_call(struct kvm_vcpu *vcpu); ++ ++#endif /* __KVM_ARM_PSCI_H__ */ diff --git a/queue-4.9/arm-arm64-kvm-implement-psci-1.0-support.patch b/queue-4.9/arm-arm64-kvm-implement-psci-1.0-support.patch new file mode 100644 index 00000000000..9fb95b25dd4 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-implement-psci-1.0-support.patch @@ -0,0 +1,117 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:27 +0100 +Subject: [PATCH v4.9.y 31/42] arm/arm64: KVM: Implement PSCI 1.0 support +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-32-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 58e0b2239a4d997094ba63986ef4de29ddc91d87 upstream. + +PSCI 1.0 can be trivially implemented by providing the FEATURES +call on top of PSCI 0.2 and returning 1.0 as the PSCI version. + +We happily ignore everything else, as they are either optional or +are clarifications that do not require any additional change. + +PSCI 1.0 is now the default until we decide to add a userspace +selection API. + +Reviewed-by: Christoffer Dall +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/kvm/psci.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + include/kvm/arm_psci.h | 3 +++ + 2 files changed, 47 insertions(+), 1 deletion(-) + +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -232,7 +232,7 @@ static void kvm_psci_system_reset(struct + int kvm_psci_version(struct kvm_vcpu *vcpu) + { + if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) +- return KVM_ARM_PSCI_0_2; ++ return KVM_ARM_PSCI_LATEST; + + return KVM_ARM_PSCI_0_1; + } +@@ -311,6 +311,47 @@ static int kvm_psci_0_2_call(struct kvm_ + return ret; + } + ++static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) ++{ ++ u32 psci_fn = smccc_get_function(vcpu); ++ u32 feature; ++ unsigned long val; ++ int ret = 1; ++ ++ switch(psci_fn) { ++ case PSCI_0_2_FN_PSCI_VERSION: ++ val = KVM_ARM_PSCI_1_0; ++ break; ++ case PSCI_1_0_FN_PSCI_FEATURES: ++ feature = smccc_get_arg1(vcpu); ++ switch(feature) { ++ case PSCI_0_2_FN_PSCI_VERSION: ++ case PSCI_0_2_FN_CPU_SUSPEND: ++ case PSCI_0_2_FN64_CPU_SUSPEND: ++ case PSCI_0_2_FN_CPU_OFF: ++ case PSCI_0_2_FN_CPU_ON: ++ case PSCI_0_2_FN64_CPU_ON: ++ case PSCI_0_2_FN_AFFINITY_INFO: ++ case PSCI_0_2_FN64_AFFINITY_INFO: ++ case PSCI_0_2_FN_MIGRATE_INFO_TYPE: ++ case PSCI_0_2_FN_SYSTEM_OFF: ++ case PSCI_0_2_FN_SYSTEM_RESET: ++ case PSCI_1_0_FN_PSCI_FEATURES: ++ val = 0; ++ break; ++ default: ++ val = PSCI_RET_NOT_SUPPORTED; ++ break; ++ } ++ break; ++ default: ++ return kvm_psci_0_2_call(vcpu); ++ } ++ ++ smccc_set_retval(vcpu, val, 0, 0, 0); ++ return ret; ++} ++ + static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) + { + struct kvm *kvm = vcpu->kvm; +@@ -353,6 +394,8 @@ static int kvm_psci_0_1_call(struct kvm_ + int kvm_psci_call(struct kvm_vcpu *vcpu) + { + switch (kvm_psci_version(vcpu)) { ++ case KVM_ARM_PSCI_1_0: ++ return kvm_psci_1_0_call(vcpu); + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: +--- a/include/kvm/arm_psci.h ++++ b/include/kvm/arm_psci.h +@@ -22,6 +22,9 @@ + + #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) + #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) ++#define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) ++ ++#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 + + int kvm_psci_version(struct kvm_vcpu *vcpu); + int kvm_psci_call(struct kvm_vcpu *vcpu); diff --git a/queue-4.9/arm-arm64-kvm-turn-kvm_psci_version-into-a-static-inline.patch b/queue-4.9/arm-arm64-kvm-turn-kvm_psci_version-into-a-static-inline.patch new file mode 100644 index 00000000000..4dbf58fd524 --- /dev/null +++ b/queue-4.9/arm-arm64-kvm-turn-kvm_psci_version-into-a-static-inline.patch @@ -0,0 +1,140 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:30 +0100 +Subject: [PATCH v4.9.y 34/42] arm/arm64: KVM: Turn kvm_psci_version into a static inline +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-35-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit a4097b351118e821841941a79ec77d3ce3f1c5d9 upstream. + +We're about to need kvm_psci_version in HYP too. So let's turn it +into a static inline, and pass the kvm structure as a second +parameter (so that HYP can do a kern_hyp_va on it). + +Tested-by: Ard Biesheuvel +Reviewed-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/kvm/psci.c | 12 ++---------- + arch/arm64/kvm/hyp/switch.c | 18 +++++++++++------- + include/kvm/arm_psci.h | 21 ++++++++++++++++++++- + 3 files changed, 33 insertions(+), 18 deletions(-) + +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -120,7 +120,7 @@ static unsigned long kvm_psci_vcpu_on(st + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.power_off) { +- if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) ++ if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; +@@ -230,14 +230,6 @@ static void kvm_psci_system_reset(struct + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); + } + +-int kvm_psci_version(struct kvm_vcpu *vcpu) +-{ +- if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) +- return KVM_ARM_PSCI_LATEST; +- +- return KVM_ARM_PSCI_0_1; +-} +- + static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) + { + struct kvm *kvm = vcpu->kvm; +@@ -395,7 +387,7 @@ static int kvm_psci_0_1_call(struct kvm_ + */ + static int kvm_psci_call(struct kvm_vcpu *vcpu) + { +- switch (kvm_psci_version(vcpu)) { ++ switch (kvm_psci_version(vcpu, vcpu->kvm)) { + case KVM_ARM_PSCI_1_0: + return kvm_psci_1_0_call(vcpu); + case KVM_ARM_PSCI_0_2: +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -19,6 +19,8 @@ + #include + #include + ++#include ++ + #include + #include + #include +@@ -311,14 +313,16 @@ again: + + if (exit_code == ARM_EXCEPTION_TRAP && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || +- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && +- vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { +- u64 val = PSCI_RET_NOT_SUPPORTED; +- if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) +- val = 2; ++ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32)) { ++ u32 val = vcpu_get_reg(vcpu, 0); + +- vcpu_set_reg(vcpu, 0, val); +- goto again; ++ if (val == PSCI_0_2_FN_PSCI_VERSION) { ++ val = kvm_psci_version(vcpu, kern_hyp_va(vcpu->kvm)); ++ if (unlikely(val == KVM_ARM_PSCI_0_1)) ++ val = PSCI_RET_NOT_SUPPORTED; ++ vcpu_set_reg(vcpu, 0, val); ++ goto again; ++ } + } + + if (static_branch_unlikely(&vgic_v2_cpuif_trap) && +--- a/include/kvm/arm_psci.h ++++ b/include/kvm/arm_psci.h +@@ -18,6 +18,7 @@ + #ifndef __KVM_ARM_PSCI_H__ + #define __KVM_ARM_PSCI_H__ + ++#include + #include + + #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) +@@ -26,7 +27,25 @@ + + #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 + +-int kvm_psci_version(struct kvm_vcpu *vcpu); ++/* ++ * We need the KVM pointer independently from the vcpu as we can call ++ * this from HYP, and need to apply kern_hyp_va on it... ++ */ ++static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) ++{ ++ /* ++ * Our PSCI implementation stays the same across versions from ++ * v0.2 onward, only adding the few mandatory functions (such ++ * as FEATURES with 1.0) that are required by newer ++ * revisions. It is thus safe to return the latest. ++ */ ++ if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) ++ return KVM_ARM_PSCI_LATEST; ++ ++ return KVM_ARM_PSCI_0_1; ++} ++ ++ + int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); + + #endif /* __KVM_ARM_PSCI_H__ */ diff --git a/queue-4.9/arm-arm64-smccc-implement-smccc-v1.1-inline-primitive.patch b/queue-4.9/arm-arm64-smccc-implement-smccc-v1.1-inline-primitive.patch new file mode 100644 index 00000000000..e63bc8cdc2d --- /dev/null +++ b/queue-4.9/arm-arm64-smccc-implement-smccc-v1.1-inline-primitive.patch @@ -0,0 +1,181 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:36 +0100 +Subject: [PATCH v4.9.y 40/42] arm/arm64: smccc: Implement SMCCC v1.1 inline primitive +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-41-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit f2d3b2e8759a5833df6f022e42df2d581e6d843c upstream. + +One of the major improvement of SMCCC v1.1 is that it only clobbers +the first 4 registers, both on 32 and 64bit. This means that it +becomes very easy to provide an inline version of the SMC call +primitive, and avoid performing a function call to stash the +registers that would otherwise be clobbered by SMCCC v1.0. + +Reviewed-by: Robin Murphy +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/arm-smccc.h | 141 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 141 insertions(+) + +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -150,5 +150,146 @@ asmlinkage void __arm_smccc_hvc(unsigned + + #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) + ++/* SMCCC v1.1 implementation madness follows */ ++#ifdef CONFIG_ARM64 ++ ++#define SMCCC_SMC_INST "smc #0" ++#define SMCCC_HVC_INST "hvc #0" ++ ++#elif defined(CONFIG_ARM) ++#include ++#include ++ ++#define SMCCC_SMC_INST __SMC(0) ++#define SMCCC_HVC_INST __HVC(0) ++ ++#endif ++ ++#define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x ++ ++#define __count_args(...) \ ++ ___count_args(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0) ++ ++#define __constraint_write_0 \ ++ "+r" (r0), "=&r" (r1), "=&r" (r2), "=&r" (r3) ++#define __constraint_write_1 \ ++ "+r" (r0), "+r" (r1), "=&r" (r2), "=&r" (r3) ++#define __constraint_write_2 \ ++ "+r" (r0), "+r" (r1), "+r" (r2), "=&r" (r3) ++#define __constraint_write_3 \ ++ "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3) ++#define __constraint_write_4 __constraint_write_3 ++#define __constraint_write_5 __constraint_write_4 ++#define __constraint_write_6 __constraint_write_5 ++#define __constraint_write_7 __constraint_write_6 ++ ++#define __constraint_read_0 ++#define __constraint_read_1 ++#define __constraint_read_2 ++#define __constraint_read_3 ++#define __constraint_read_4 "r" (r4) ++#define __constraint_read_5 __constraint_read_4, "r" (r5) ++#define __constraint_read_6 __constraint_read_5, "r" (r6) ++#define __constraint_read_7 __constraint_read_6, "r" (r7) ++ ++#define __declare_arg_0(a0, res) \ ++ struct arm_smccc_res *___res = res; \ ++ register u32 r0 asm("r0") = a0; \ ++ register unsigned long r1 asm("r1"); \ ++ register unsigned long r2 asm("r2"); \ ++ register unsigned long r3 asm("r3") ++ ++#define __declare_arg_1(a0, a1, res) \ ++ struct arm_smccc_res *___res = res; \ ++ register u32 r0 asm("r0") = a0; \ ++ register typeof(a1) r1 asm("r1") = a1; \ ++ register unsigned long r2 asm("r2"); \ ++ register unsigned long r3 asm("r3") ++ ++#define __declare_arg_2(a0, a1, a2, res) \ ++ struct arm_smccc_res *___res = res; \ ++ register u32 r0 asm("r0") = a0; \ ++ register typeof(a1) r1 asm("r1") = a1; \ ++ register typeof(a2) r2 asm("r2") = a2; \ ++ register unsigned long r3 asm("r3") ++ ++#define __declare_arg_3(a0, a1, a2, a3, res) \ ++ struct arm_smccc_res *___res = res; \ ++ register u32 r0 asm("r0") = a0; \ ++ register typeof(a1) r1 asm("r1") = a1; \ ++ register typeof(a2) r2 asm("r2") = a2; \ ++ register typeof(a3) r3 asm("r3") = a3 ++ ++#define __declare_arg_4(a0, a1, a2, a3, a4, res) \ ++ __declare_arg_3(a0, a1, a2, a3, res); \ ++ register typeof(a4) r4 asm("r4") = a4 ++ ++#define __declare_arg_5(a0, a1, a2, a3, a4, a5, res) \ ++ __declare_arg_4(a0, a1, a2, a3, a4, res); \ ++ register typeof(a5) r5 asm("r5") = a5 ++ ++#define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res) \ ++ __declare_arg_5(a0, a1, a2, a3, a4, a5, res); \ ++ register typeof(a6) r6 asm("r6") = a6 ++ ++#define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res) \ ++ __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res); \ ++ register typeof(a7) r7 asm("r7") = a7 ++ ++#define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__) ++#define __declare_args(count, ...) ___declare_args(count, __VA_ARGS__) ++ ++#define ___constraints(count) \ ++ : __constraint_write_ ## count \ ++ : __constraint_read_ ## count \ ++ : "memory" ++#define __constraints(count) ___constraints(count) ++ ++/* ++ * We have an output list that is not necessarily used, and GCC feels ++ * entitled to optimise the whole sequence away. "volatile" is what ++ * makes it stick. ++ */ ++#define __arm_smccc_1_1(inst, ...) \ ++ do { \ ++ __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ ++ asm volatile(inst "\n" \ ++ __constraints(__count_args(__VA_ARGS__))); \ ++ if (___res) \ ++ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ ++ } while (0) ++ ++/* ++ * arm_smccc_1_1_smc() - make an SMCCC v1.1 compliant SMC call ++ * ++ * This is a variadic macro taking one to eight source arguments, and ++ * an optional return structure. ++ * ++ * @a0-a7: arguments passed in registers 0 to 7 ++ * @res: result values from registers 0 to 3 ++ * ++ * This macro is used to make SMC calls following SMC Calling Convention v1.1. ++ * The content of the supplied param are copied to registers 0 to 7 prior ++ * to the SMC instruction. The return values are updated with the content ++ * from register 0 to 3 on return from the SMC instruction if not NULL. ++ */ ++#define arm_smccc_1_1_smc(...) __arm_smccc_1_1(SMCCC_SMC_INST, __VA_ARGS__) ++ ++/* ++ * arm_smccc_1_1_hvc() - make an SMCCC v1.1 compliant HVC call ++ * ++ * This is a variadic macro taking one to eight source arguments, and ++ * an optional return structure. ++ * ++ * @a0-a7: arguments passed in registers 0 to 7 ++ * @res: result values from registers 0 to 3 ++ * ++ * This macro is used to make HVC calls following SMC Calling Convention v1.1. ++ * The content of the supplied param are copied to registers 0 to 7 prior ++ * to the HVC instruction. The return values are updated with the content ++ * from register 0 to 3 on return from the HVC instruction if not NULL. ++ */ ++#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) ++ + #endif /*__ASSEMBLY__*/ + #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/queue-4.9/arm-arm64-smccc-make-function-identifiers-an-unsigned-quantity.patch b/queue-4.9/arm-arm64-smccc-make-function-identifiers-an-unsigned-quantity.patch new file mode 100644 index 00000000000..c928b032c9c --- /dev/null +++ b/queue-4.9/arm-arm64-smccc-make-function-identifiers-an-unsigned-quantity.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:35 +0100 +Subject: [PATCH v4.9.y 39/42] arm/arm64: smccc: Make function identifiers an unsigned quantity +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-40-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit ded4c39e93f3b72968fdb79baba27f3b83dad34c upstream. + +Function identifiers are a 32bit, unsigned quantity. But we never +tell so to the compiler, resulting in the following: + + 4ac: b26187e0 mov x0, #0xffffffff80000001 + +We thus rely on the firmware narrowing it for us, which is not +always a reasonable expectation. + +Cc: stable@vger.kernel.org +Reported-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Reviewed-by: Robin Murphy +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/arm-smccc.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -14,14 +14,16 @@ + #ifndef __LINUX_ARM_SMCCC_H + #define __LINUX_ARM_SMCCC_H + ++#include ++ + /* + * This file provides common defines for ARM SMC Calling Convention as + * specified in + * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html + */ + +-#define ARM_SMCCC_STD_CALL 0 +-#define ARM_SMCCC_FAST_CALL 1 ++#define ARM_SMCCC_STD_CALL _AC(0,U) ++#define ARM_SMCCC_FAST_CALL _AC(1,U) + #define ARM_SMCCC_TYPE_SHIFT 31 + + #define ARM_SMCCC_SMC_32 0 diff --git a/queue-4.9/arm64-add-arm_smccc_arch_workaround_1-bp-hardening-support.patch b/queue-4.9/arm64-add-arm_smccc_arch_workaround_1-bp-hardening-support.patch new file mode 100644 index 00000000000..dc973daa53b --- /dev/null +++ b/queue-4.9/arm64-add-arm_smccc_arch_workaround_1-bp-hardening-support.patch @@ -0,0 +1,3323 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:37 +0100 +Subject: [PATCH v4.9.y 41/42] arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-42-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit b092201e0020614127f495c092e0a12d26a2116e upstream. + +Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1. +It is lovely. Really. + +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/crypto/sha256-core.S | 2061 ++++++++++++++++++++++++++++++++++++++++ + arch/arm64/crypto/sha512-core.S | 1085 +++++++++++++++++++++ + arch/arm64/kernel/bpi.S | 20 + arch/arm64/kernel/cpu_errata.c | 72 + + 4 files changed, 3235 insertions(+), 3 deletions(-) + create mode 100644 arch/arm64/crypto/sha256-core.S + create mode 100644 arch/arm64/crypto/sha512-core.S + +--- /dev/null ++++ b/arch/arm64/crypto/sha256-core.S +@@ -0,0 +1,2061 @@ ++// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. ++// ++// Licensed under the OpenSSL license (the "License"). You may not use ++// this file except in compliance with the License. You can obtain a copy ++// in the file LICENSE in the source distribution or at ++// https://www.openssl.org/source/license.html ++ ++// ==================================================================== ++// Written by Andy Polyakov for the OpenSSL ++// project. The module is, however, dual licensed under OpenSSL and ++// CRYPTOGAMS licenses depending on where you obtain it. For further ++// details see http://www.openssl.org/~appro/cryptogams/. ++// ++// Permission to use under GPLv2 terms is granted. ++// ==================================================================== ++// ++// SHA256/512 for ARMv8. ++// ++// Performance in cycles per processed byte and improvement coefficient ++// over code generated with "default" compiler: ++// ++// SHA256-hw SHA256(*) SHA512 ++// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) ++// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) ++// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) ++// Denver 2.01 10.5 (+26%) 6.70 (+8%) ++// X-Gene 20.0 (+100%) 12.8 (+300%(***)) ++// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) ++// ++// (*) Software SHA256 results are of lesser relevance, presented ++// mostly for informational purposes. ++// (**) The result is a trade-off: it's possible to improve it by ++// 10% (or by 1 cycle per round), but at the cost of 20% loss ++// on Cortex-A53 (or by 4 cycles per round). ++// (***) Super-impressive coefficients over gcc-generated code are ++// indication of some compiler "pathology", most notably code ++// generated with -mgeneral-regs-only is significanty faster ++// and the gap is only 40-90%. ++// ++// October 2016. ++// ++// Originally it was reckoned that it makes no sense to implement NEON ++// version of SHA256 for 64-bit processors. This is because performance ++// improvement on most wide-spread Cortex-A5x processors was observed ++// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was ++// observed that 32-bit NEON SHA256 performs significantly better than ++// 64-bit scalar version on *some* of the more recent processors. As ++// result 64-bit NEON version of SHA256 was added to provide best ++// all-round performance. For example it executes ~30% faster on X-Gene ++// and Mongoose. [For reference, NEON version of SHA512 is bound to ++// deliver much less improvement, likely *negative* on Cortex-A5x. ++// Which is why NEON support is limited to SHA256.] ++ ++#ifndef __KERNEL__ ++# include "arm_arch.h" ++#endif ++ ++.text ++ ++.extern OPENSSL_armcap_P ++.globl sha256_block_data_order ++.type sha256_block_data_order,%function ++.align 6 ++sha256_block_data_order: ++#ifndef __KERNEL__ ++# ifdef __ILP32__ ++ ldrsw x16,.LOPENSSL_armcap_P ++# else ++ ldr x16,.LOPENSSL_armcap_P ++# endif ++ adr x17,.LOPENSSL_armcap_P ++ add x16,x16,x17 ++ ldr w16,[x16] ++ tst w16,#ARMV8_SHA256 ++ b.ne .Lv8_entry ++ tst w16,#ARMV7_NEON ++ b.ne .Lneon_entry ++#endif ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ sub sp,sp,#4*4 ++ ++ ldp w20,w21,[x0] // load context ++ ldp w22,w23,[x0,#2*4] ++ ldp w24,w25,[x0,#4*4] ++ add x2,x1,x2,lsl#6 // end of input ++ ldp w26,w27,[x0,#6*4] ++ adr x30,.LK256 ++ stp x0,x2,[x29,#96] ++ ++.Loop: ++ ldp w3,w4,[x1],#2*4 ++ ldr w19,[x30],#4 // *K++ ++ eor w28,w21,w22 // magic seed ++ str x1,[x29,#112] ++#ifndef __AARCH64EB__ ++ rev w3,w3 // 0 ++#endif ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ eor w6,w24,w24,ror#14 ++ and w17,w25,w24 ++ bic w19,w26,w24 ++ add w27,w27,w3 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w6,ror#11 // Sigma1(e) ++ ror w6,w20,#2 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ eor w17,w20,w20,ror#9 ++ add w27,w27,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w23,w23,w27 // d+=h ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w6,w17,ror#13 // Sigma0(a) ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w27,w27,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w4,w4 // 1 ++#endif ++ ldp w5,w6,[x1],#2*4 ++ add w27,w27,w17 // h+=Sigma0(a) ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ eor w7,w23,w23,ror#14 ++ and w17,w24,w23 ++ bic w28,w25,w23 ++ add w26,w26,w4 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w7,ror#11 // Sigma1(e) ++ ror w7,w27,#2 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ eor w17,w27,w27,ror#9 ++ add w26,w26,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w22,w22,w26 // d+=h ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w7,w17,ror#13 // Sigma0(a) ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w26,w26,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w5,w5 // 2 ++#endif ++ add w26,w26,w17 // h+=Sigma0(a) ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ eor w8,w22,w22,ror#14 ++ and w17,w23,w22 ++ bic w19,w24,w22 ++ add w25,w25,w5 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w8,ror#11 // Sigma1(e) ++ ror w8,w26,#2 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ eor w17,w26,w26,ror#9 ++ add w25,w25,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w21,w21,w25 // d+=h ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w8,w17,ror#13 // Sigma0(a) ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w25,w25,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w6,w6 // 3 ++#endif ++ ldp w7,w8,[x1],#2*4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ eor w9,w21,w21,ror#14 ++ and w17,w22,w21 ++ bic w28,w23,w21 ++ add w24,w24,w6 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w9,ror#11 // Sigma1(e) ++ ror w9,w25,#2 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ eor w17,w25,w25,ror#9 ++ add w24,w24,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w20,w20,w24 // d+=h ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w9,w17,ror#13 // Sigma0(a) ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w24,w24,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w7,w7 // 4 ++#endif ++ add w24,w24,w17 // h+=Sigma0(a) ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ eor w10,w20,w20,ror#14 ++ and w17,w21,w20 ++ bic w19,w22,w20 ++ add w23,w23,w7 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w10,ror#11 // Sigma1(e) ++ ror w10,w24,#2 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ eor w17,w24,w24,ror#9 ++ add w23,w23,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w27,w27,w23 // d+=h ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w10,w17,ror#13 // Sigma0(a) ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w23,w23,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w8,w8 // 5 ++#endif ++ ldp w9,w10,[x1],#2*4 ++ add w23,w23,w17 // h+=Sigma0(a) ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ eor w11,w27,w27,ror#14 ++ and w17,w20,w27 ++ bic w28,w21,w27 ++ add w22,w22,w8 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w11,ror#11 // Sigma1(e) ++ ror w11,w23,#2 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ eor w17,w23,w23,ror#9 ++ add w22,w22,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w26,w26,w22 // d+=h ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w11,w17,ror#13 // Sigma0(a) ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w22,w22,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w9,w9 // 6 ++#endif ++ add w22,w22,w17 // h+=Sigma0(a) ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ eor w12,w26,w26,ror#14 ++ and w17,w27,w26 ++ bic w19,w20,w26 ++ add w21,w21,w9 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w12,ror#11 // Sigma1(e) ++ ror w12,w22,#2 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ eor w17,w22,w22,ror#9 ++ add w21,w21,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w25,w25,w21 // d+=h ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w12,w17,ror#13 // Sigma0(a) ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w21,w21,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w10,w10 // 7 ++#endif ++ ldp w11,w12,[x1],#2*4 ++ add w21,w21,w17 // h+=Sigma0(a) ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ eor w13,w25,w25,ror#14 ++ and w17,w26,w25 ++ bic w28,w27,w25 ++ add w20,w20,w10 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w13,ror#11 // Sigma1(e) ++ ror w13,w21,#2 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ eor w17,w21,w21,ror#9 ++ add w20,w20,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w24,w24,w20 // d+=h ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w13,w17,ror#13 // Sigma0(a) ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w20,w20,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w11,w11 // 8 ++#endif ++ add w20,w20,w17 // h+=Sigma0(a) ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ eor w14,w24,w24,ror#14 ++ and w17,w25,w24 ++ bic w19,w26,w24 ++ add w27,w27,w11 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w14,ror#11 // Sigma1(e) ++ ror w14,w20,#2 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ eor w17,w20,w20,ror#9 ++ add w27,w27,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w23,w23,w27 // d+=h ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w14,w17,ror#13 // Sigma0(a) ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w27,w27,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w12,w12 // 9 ++#endif ++ ldp w13,w14,[x1],#2*4 ++ add w27,w27,w17 // h+=Sigma0(a) ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ eor w15,w23,w23,ror#14 ++ and w17,w24,w23 ++ bic w28,w25,w23 ++ add w26,w26,w12 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w15,ror#11 // Sigma1(e) ++ ror w15,w27,#2 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ eor w17,w27,w27,ror#9 ++ add w26,w26,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w22,w22,w26 // d+=h ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w15,w17,ror#13 // Sigma0(a) ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w26,w26,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w13,w13 // 10 ++#endif ++ add w26,w26,w17 // h+=Sigma0(a) ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ eor w0,w22,w22,ror#14 ++ and w17,w23,w22 ++ bic w19,w24,w22 ++ add w25,w25,w13 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w0,ror#11 // Sigma1(e) ++ ror w0,w26,#2 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ eor w17,w26,w26,ror#9 ++ add w25,w25,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w21,w21,w25 // d+=h ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w0,w17,ror#13 // Sigma0(a) ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w25,w25,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w14,w14 // 11 ++#endif ++ ldp w15,w0,[x1],#2*4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ str w6,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ eor w6,w21,w21,ror#14 ++ and w17,w22,w21 ++ bic w28,w23,w21 ++ add w24,w24,w14 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w6,ror#11 // Sigma1(e) ++ ror w6,w25,#2 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ eor w17,w25,w25,ror#9 ++ add w24,w24,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w20,w20,w24 // d+=h ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w6,w17,ror#13 // Sigma0(a) ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w24,w24,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w15,w15 // 12 ++#endif ++ add w24,w24,w17 // h+=Sigma0(a) ++ str w7,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ eor w7,w20,w20,ror#14 ++ and w17,w21,w20 ++ bic w19,w22,w20 ++ add w23,w23,w15 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w7,ror#11 // Sigma1(e) ++ ror w7,w24,#2 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ eor w17,w24,w24,ror#9 ++ add w23,w23,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w27,w27,w23 // d+=h ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w7,w17,ror#13 // Sigma0(a) ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w23,w23,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w0,w0 // 13 ++#endif ++ ldp w1,w2,[x1] ++ add w23,w23,w17 // h+=Sigma0(a) ++ str w8,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ eor w8,w27,w27,ror#14 ++ and w17,w20,w27 ++ bic w28,w21,w27 ++ add w22,w22,w0 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w8,ror#11 // Sigma1(e) ++ ror w8,w23,#2 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ eor w17,w23,w23,ror#9 ++ add w22,w22,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w26,w26,w22 // d+=h ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w8,w17,ror#13 // Sigma0(a) ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w22,w22,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w1,w1 // 14 ++#endif ++ ldr w6,[sp,#12] ++ add w22,w22,w17 // h+=Sigma0(a) ++ str w9,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ eor w9,w26,w26,ror#14 ++ and w17,w27,w26 ++ bic w19,w20,w26 ++ add w21,w21,w1 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w9,ror#11 // Sigma1(e) ++ ror w9,w22,#2 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ eor w17,w22,w22,ror#9 ++ add w21,w21,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w25,w25,w21 // d+=h ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w9,w17,ror#13 // Sigma0(a) ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w21,w21,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w2,w2 // 15 ++#endif ++ ldr w7,[sp,#0] ++ add w21,w21,w17 // h+=Sigma0(a) ++ str w10,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w9,w4,#7 ++ and w17,w26,w25 ++ ror w8,w1,#17 ++ bic w28,w27,w25 ++ ror w10,w21,#2 ++ add w20,w20,w2 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w9,w9,w4,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w10,w10,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w8,w8,w1,ror#19 ++ eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w10,w21,ror#22 // Sigma0(a) ++ eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) ++ add w3,w3,w12 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w3,w3,w9 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w3,w3,w8 ++.Loop_16_xx: ++ ldr w8,[sp,#4] ++ str w11,[sp,#0] ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ ror w10,w5,#7 ++ and w17,w25,w24 ++ ror w9,w2,#17 ++ bic w19,w26,w24 ++ ror w11,w20,#2 ++ add w27,w27,w3 // h+=X[i] ++ eor w16,w16,w24,ror#11 ++ eor w10,w10,w5,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w24,ror#25 // Sigma1(e) ++ eor w11,w11,w20,ror#13 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w9,w9,w2,ror#19 ++ eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) ++ add w27,w27,w16 // h+=Sigma1(e) ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w11,w20,ror#22 // Sigma0(a) ++ eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) ++ add w4,w4,w13 ++ add w23,w23,w27 // d+=h ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w4,w4,w10 ++ add w27,w27,w17 // h+=Sigma0(a) ++ add w4,w4,w9 ++ ldr w9,[sp,#8] ++ str w12,[sp,#4] ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ ror w11,w6,#7 ++ and w17,w24,w23 ++ ror w10,w3,#17 ++ bic w28,w25,w23 ++ ror w12,w27,#2 ++ add w26,w26,w4 // h+=X[i] ++ eor w16,w16,w23,ror#11 ++ eor w11,w11,w6,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w23,ror#25 // Sigma1(e) ++ eor w12,w12,w27,ror#13 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w10,w10,w3,ror#19 ++ eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) ++ add w26,w26,w16 // h+=Sigma1(e) ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w12,w27,ror#22 // Sigma0(a) ++ eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) ++ add w5,w5,w14 ++ add w22,w22,w26 // d+=h ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w5,w5,w11 ++ add w26,w26,w17 // h+=Sigma0(a) ++ add w5,w5,w10 ++ ldr w10,[sp,#12] ++ str w13,[sp,#8] ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ ror w12,w7,#7 ++ and w17,w23,w22 ++ ror w11,w4,#17 ++ bic w19,w24,w22 ++ ror w13,w26,#2 ++ add w25,w25,w5 // h+=X[i] ++ eor w16,w16,w22,ror#11 ++ eor w12,w12,w7,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w22,ror#25 // Sigma1(e) ++ eor w13,w13,w26,ror#13 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w11,w11,w4,ror#19 ++ eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) ++ add w25,w25,w16 // h+=Sigma1(e) ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w13,w26,ror#22 // Sigma0(a) ++ eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) ++ add w6,w6,w15 ++ add w21,w21,w25 // d+=h ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w6,w6,w12 ++ add w25,w25,w17 // h+=Sigma0(a) ++ add w6,w6,w11 ++ ldr w11,[sp,#0] ++ str w14,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ ror w13,w8,#7 ++ and w17,w22,w21 ++ ror w12,w5,#17 ++ bic w28,w23,w21 ++ ror w14,w25,#2 ++ add w24,w24,w6 // h+=X[i] ++ eor w16,w16,w21,ror#11 ++ eor w13,w13,w8,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w21,ror#25 // Sigma1(e) ++ eor w14,w14,w25,ror#13 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w12,w12,w5,ror#19 ++ eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) ++ add w24,w24,w16 // h+=Sigma1(e) ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w14,w25,ror#22 // Sigma0(a) ++ eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) ++ add w7,w7,w0 ++ add w20,w20,w24 // d+=h ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w7,w7,w13 ++ add w24,w24,w17 // h+=Sigma0(a) ++ add w7,w7,w12 ++ ldr w12,[sp,#4] ++ str w15,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ ror w14,w9,#7 ++ and w17,w21,w20 ++ ror w13,w6,#17 ++ bic w19,w22,w20 ++ ror w15,w24,#2 ++ add w23,w23,w7 // h+=X[i] ++ eor w16,w16,w20,ror#11 ++ eor w14,w14,w9,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w20,ror#25 // Sigma1(e) ++ eor w15,w15,w24,ror#13 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w13,w13,w6,ror#19 ++ eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) ++ add w23,w23,w16 // h+=Sigma1(e) ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w15,w24,ror#22 // Sigma0(a) ++ eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) ++ add w8,w8,w1 ++ add w27,w27,w23 // d+=h ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w8,w8,w14 ++ add w23,w23,w17 // h+=Sigma0(a) ++ add w8,w8,w13 ++ ldr w13,[sp,#8] ++ str w0,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ ror w15,w10,#7 ++ and w17,w20,w27 ++ ror w14,w7,#17 ++ bic w28,w21,w27 ++ ror w0,w23,#2 ++ add w22,w22,w8 // h+=X[i] ++ eor w16,w16,w27,ror#11 ++ eor w15,w15,w10,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w27,ror#25 // Sigma1(e) ++ eor w0,w0,w23,ror#13 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w14,w14,w7,ror#19 ++ eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) ++ add w22,w22,w16 // h+=Sigma1(e) ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w0,w23,ror#22 // Sigma0(a) ++ eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) ++ add w9,w9,w2 ++ add w26,w26,w22 // d+=h ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w9,w9,w15 ++ add w22,w22,w17 // h+=Sigma0(a) ++ add w9,w9,w14 ++ ldr w14,[sp,#12] ++ str w1,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ ror w0,w11,#7 ++ and w17,w27,w26 ++ ror w15,w8,#17 ++ bic w19,w20,w26 ++ ror w1,w22,#2 ++ add w21,w21,w9 // h+=X[i] ++ eor w16,w16,w26,ror#11 ++ eor w0,w0,w11,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w26,ror#25 // Sigma1(e) ++ eor w1,w1,w22,ror#13 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w15,w15,w8,ror#19 ++ eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) ++ add w21,w21,w16 // h+=Sigma1(e) ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w1,w22,ror#22 // Sigma0(a) ++ eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) ++ add w10,w10,w3 ++ add w25,w25,w21 // d+=h ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w10,w10,w0 ++ add w21,w21,w17 // h+=Sigma0(a) ++ add w10,w10,w15 ++ ldr w15,[sp,#0] ++ str w2,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w1,w12,#7 ++ and w17,w26,w25 ++ ror w0,w9,#17 ++ bic w28,w27,w25 ++ ror w2,w21,#2 ++ add w20,w20,w10 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w1,w1,w12,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w2,w2,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w0,w0,w9,ror#19 ++ eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w2,w21,ror#22 // Sigma0(a) ++ eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) ++ add w11,w11,w4 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w11,w11,w1 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w11,w11,w0 ++ ldr w0,[sp,#4] ++ str w3,[sp,#0] ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ ror w2,w13,#7 ++ and w17,w25,w24 ++ ror w1,w10,#17 ++ bic w19,w26,w24 ++ ror w3,w20,#2 ++ add w27,w27,w11 // h+=X[i] ++ eor w16,w16,w24,ror#11 ++ eor w2,w2,w13,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w24,ror#25 // Sigma1(e) ++ eor w3,w3,w20,ror#13 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w1,w1,w10,ror#19 ++ eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) ++ add w27,w27,w16 // h+=Sigma1(e) ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w3,w20,ror#22 // Sigma0(a) ++ eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) ++ add w12,w12,w5 ++ add w23,w23,w27 // d+=h ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w12,w12,w2 ++ add w27,w27,w17 // h+=Sigma0(a) ++ add w12,w12,w1 ++ ldr w1,[sp,#8] ++ str w4,[sp,#4] ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ ror w3,w14,#7 ++ and w17,w24,w23 ++ ror w2,w11,#17 ++ bic w28,w25,w23 ++ ror w4,w27,#2 ++ add w26,w26,w12 // h+=X[i] ++ eor w16,w16,w23,ror#11 ++ eor w3,w3,w14,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w23,ror#25 // Sigma1(e) ++ eor w4,w4,w27,ror#13 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w2,w2,w11,ror#19 ++ eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) ++ add w26,w26,w16 // h+=Sigma1(e) ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w4,w27,ror#22 // Sigma0(a) ++ eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) ++ add w13,w13,w6 ++ add w22,w22,w26 // d+=h ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w13,w13,w3 ++ add w26,w26,w17 // h+=Sigma0(a) ++ add w13,w13,w2 ++ ldr w2,[sp,#12] ++ str w5,[sp,#8] ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ ror w4,w15,#7 ++ and w17,w23,w22 ++ ror w3,w12,#17 ++ bic w19,w24,w22 ++ ror w5,w26,#2 ++ add w25,w25,w13 // h+=X[i] ++ eor w16,w16,w22,ror#11 ++ eor w4,w4,w15,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w22,ror#25 // Sigma1(e) ++ eor w5,w5,w26,ror#13 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w3,w3,w12,ror#19 ++ eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) ++ add w25,w25,w16 // h+=Sigma1(e) ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w5,w26,ror#22 // Sigma0(a) ++ eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) ++ add w14,w14,w7 ++ add w21,w21,w25 // d+=h ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w14,w14,w4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ add w14,w14,w3 ++ ldr w3,[sp,#0] ++ str w6,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ ror w5,w0,#7 ++ and w17,w22,w21 ++ ror w4,w13,#17 ++ bic w28,w23,w21 ++ ror w6,w25,#2 ++ add w24,w24,w14 // h+=X[i] ++ eor w16,w16,w21,ror#11 ++ eor w5,w5,w0,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w21,ror#25 // Sigma1(e) ++ eor w6,w6,w25,ror#13 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w4,w4,w13,ror#19 ++ eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) ++ add w24,w24,w16 // h+=Sigma1(e) ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w6,w25,ror#22 // Sigma0(a) ++ eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) ++ add w15,w15,w8 ++ add w20,w20,w24 // d+=h ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w15,w15,w5 ++ add w24,w24,w17 // h+=Sigma0(a) ++ add w15,w15,w4 ++ ldr w4,[sp,#4] ++ str w7,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ ror w6,w1,#7 ++ and w17,w21,w20 ++ ror w5,w14,#17 ++ bic w19,w22,w20 ++ ror w7,w24,#2 ++ add w23,w23,w15 // h+=X[i] ++ eor w16,w16,w20,ror#11 ++ eor w6,w6,w1,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w20,ror#25 // Sigma1(e) ++ eor w7,w7,w24,ror#13 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w5,w5,w14,ror#19 ++ eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) ++ add w23,w23,w16 // h+=Sigma1(e) ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w7,w24,ror#22 // Sigma0(a) ++ eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) ++ add w0,w0,w9 ++ add w27,w27,w23 // d+=h ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w0,w0,w6 ++ add w23,w23,w17 // h+=Sigma0(a) ++ add w0,w0,w5 ++ ldr w5,[sp,#8] ++ str w8,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ ror w7,w2,#7 ++ and w17,w20,w27 ++ ror w6,w15,#17 ++ bic w28,w21,w27 ++ ror w8,w23,#2 ++ add w22,w22,w0 // h+=X[i] ++ eor w16,w16,w27,ror#11 ++ eor w7,w7,w2,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w27,ror#25 // Sigma1(e) ++ eor w8,w8,w23,ror#13 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w6,w6,w15,ror#19 ++ eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) ++ add w22,w22,w16 // h+=Sigma1(e) ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w8,w23,ror#22 // Sigma0(a) ++ eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) ++ add w1,w1,w10 ++ add w26,w26,w22 // d+=h ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w1,w1,w7 ++ add w22,w22,w17 // h+=Sigma0(a) ++ add w1,w1,w6 ++ ldr w6,[sp,#12] ++ str w9,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ ror w8,w3,#7 ++ and w17,w27,w26 ++ ror w7,w0,#17 ++ bic w19,w20,w26 ++ ror w9,w22,#2 ++ add w21,w21,w1 // h+=X[i] ++ eor w16,w16,w26,ror#11 ++ eor w8,w8,w3,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w26,ror#25 // Sigma1(e) ++ eor w9,w9,w22,ror#13 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w7,w7,w0,ror#19 ++ eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) ++ add w21,w21,w16 // h+=Sigma1(e) ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w9,w22,ror#22 // Sigma0(a) ++ eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) ++ add w2,w2,w11 ++ add w25,w25,w21 // d+=h ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w2,w2,w8 ++ add w21,w21,w17 // h+=Sigma0(a) ++ add w2,w2,w7 ++ ldr w7,[sp,#0] ++ str w10,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w9,w4,#7 ++ and w17,w26,w25 ++ ror w8,w1,#17 ++ bic w28,w27,w25 ++ ror w10,w21,#2 ++ add w20,w20,w2 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w9,w9,w4,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w10,w10,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w8,w8,w1,ror#19 ++ eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w10,w21,ror#22 // Sigma0(a) ++ eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) ++ add w3,w3,w12 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w3,w3,w9 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w3,w3,w8 ++ cbnz w19,.Loop_16_xx ++ ++ ldp x0,x2,[x29,#96] ++ ldr x1,[x29,#112] ++ sub x30,x30,#260 // rewind ++ ++ ldp w3,w4,[x0] ++ ldp w5,w6,[x0,#2*4] ++ add x1,x1,#14*4 // advance input pointer ++ ldp w7,w8,[x0,#4*4] ++ add w20,w20,w3 ++ ldp w9,w10,[x0,#6*4] ++ add w21,w21,w4 ++ add w22,w22,w5 ++ add w23,w23,w6 ++ stp w20,w21,[x0] ++ add w24,w24,w7 ++ add w25,w25,w8 ++ stp w22,w23,[x0,#2*4] ++ add w26,w26,w9 ++ add w27,w27,w10 ++ cmp x1,x2 ++ stp w24,w25,[x0,#4*4] ++ stp w26,w27,[x0,#6*4] ++ b.ne .Loop ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#4*4 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#128 ++ ret ++.size sha256_block_data_order,.-sha256_block_data_order ++ ++.align 6 ++.type .LK256,%object ++.LK256: ++ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 ++ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 ++ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 ++ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 ++ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc ++ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da ++ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 ++ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 ++ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 ++ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 ++ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 ++ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 ++ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 ++ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 ++ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 ++ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ++ .long 0 //terminator ++.size .LK256,.-.LK256 ++#ifndef __KERNEL__ ++.align 3 ++.LOPENSSL_armcap_P: ++# ifdef __ILP32__ ++ .long OPENSSL_armcap_P-. ++# else ++ .quad OPENSSL_armcap_P-. ++# endif ++#endif ++.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " ++.align 2 ++#ifndef __KERNEL__ ++.type sha256_block_armv8,%function ++.align 6 ++sha256_block_armv8: ++.Lv8_entry: ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v0.4s,v1.4s},[x0] ++ adr x3,.LK256 ++ ++.Loop_hw: ++ ld1 {v4.16b-v7.16b},[x1],#64 ++ sub x2,x2,#1 ++ ld1 {v16.4s},[x3],#16 ++ rev32 v4.16b,v4.16b ++ rev32 v5.16b,v5.16b ++ rev32 v6.16b,v6.16b ++ rev32 v7.16b,v7.16b ++ orr v18.16b,v0.16b,v0.16b // offload ++ orr v19.16b,v1.16b,v1.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++ .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++ .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++ .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++ .inst 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ ++ ld1 {v17.4s},[x3] ++ add v16.4s,v16.4s,v6.4s ++ sub x3,x3,#64*4-16 // rewind ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++ .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ ++ add v17.4s,v17.4s,v7.4s ++ orr v2.16b,v0.16b,v0.16b ++ .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++ .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ ++ add v0.4s,v0.4s,v18.4s ++ add v1.4s,v1.4s,v19.4s ++ ++ cbnz x2,.Loop_hw ++ ++ st1 {v0.4s,v1.4s},[x0] ++ ++ ldr x29,[sp],#16 ++ ret ++.size sha256_block_armv8,.-sha256_block_armv8 ++#endif ++#ifdef __KERNEL__ ++.globl sha256_block_neon ++#endif ++.type sha256_block_neon,%function ++.align 4 ++sha256_block_neon: ++.Lneon_entry: ++ stp x29, x30, [sp, #-16]! ++ mov x29, sp ++ sub sp,sp,#16*4 ++ ++ adr x16,.LK256 ++ add x2,x1,x2,lsl#6 // len to point at the end of inp ++ ++ ld1 {v0.16b},[x1], #16 ++ ld1 {v1.16b},[x1], #16 ++ ld1 {v2.16b},[x1], #16 ++ ld1 {v3.16b},[x1], #16 ++ ld1 {v4.4s},[x16], #16 ++ ld1 {v5.4s},[x16], #16 ++ ld1 {v6.4s},[x16], #16 ++ ld1 {v7.4s},[x16], #16 ++ rev32 v0.16b,v0.16b // yes, even on ++ rev32 v1.16b,v1.16b // big-endian ++ rev32 v2.16b,v2.16b ++ rev32 v3.16b,v3.16b ++ mov x17,sp ++ add v4.4s,v4.4s,v0.4s ++ add v5.4s,v5.4s,v1.4s ++ add v6.4s,v6.4s,v2.4s ++ st1 {v4.4s-v5.4s},[x17], #32 ++ add v7.4s,v7.4s,v3.4s ++ st1 {v6.4s-v7.4s},[x17] ++ sub x17,x17,#32 ++ ++ ldp w3,w4,[x0] ++ ldp w5,w6,[x0,#8] ++ ldp w7,w8,[x0,#16] ++ ldp w9,w10,[x0,#24] ++ ldr w12,[sp,#0] ++ mov w13,wzr ++ eor w14,w4,w5 ++ mov w15,wzr ++ b .L_00_48 ++ ++.align 4 ++.L_00_48: ++ ext v4.16b,v0.16b,v1.16b,#4 ++ add w10,w10,w12 ++ add w3,w3,w15 ++ and w12,w8,w7 ++ bic w15,w9,w7 ++ ext v7.16b,v2.16b,v3.16b,#4 ++ eor w11,w7,w7,ror#5 ++ add w3,w3,w13 ++ mov d19,v3.d[1] ++ orr w12,w12,w15 ++ eor w11,w11,w7,ror#19 ++ ushr v6.4s,v4.4s,#7 ++ eor w15,w3,w3,ror#11 ++ ushr v5.4s,v4.4s,#3 ++ add w10,w10,w12 ++ add v0.4s,v0.4s,v7.4s ++ ror w11,w11,#6 ++ sli v6.4s,v4.4s,#25 ++ eor w13,w3,w4 ++ eor w15,w15,w3,ror#20 ++ ushr v7.4s,v4.4s,#18 ++ add w10,w10,w11 ++ ldr w12,[sp,#4] ++ and w14,w14,w13 ++ eor v5.16b,v5.16b,v6.16b ++ ror w15,w15,#2 ++ add w6,w6,w10 ++ sli v7.4s,v4.4s,#14 ++ eor w14,w14,w4 ++ ushr v16.4s,v19.4s,#17 ++ add w9,w9,w12 ++ add w10,w10,w15 ++ and w12,w7,w6 ++ eor v5.16b,v5.16b,v7.16b ++ bic w15,w8,w6 ++ eor w11,w6,w6,ror#5 ++ sli v16.4s,v19.4s,#15 ++ add w10,w10,w14 ++ orr w12,w12,w15 ++ ushr v17.4s,v19.4s,#10 ++ eor w11,w11,w6,ror#19 ++ eor w15,w10,w10,ror#11 ++ ushr v7.4s,v19.4s,#19 ++ add w9,w9,w12 ++ ror w11,w11,#6 ++ add v0.4s,v0.4s,v5.4s ++ eor w14,w10,w3 ++ eor w15,w15,w10,ror#20 ++ sli v7.4s,v19.4s,#13 ++ add w9,w9,w11 ++ ldr w12,[sp,#8] ++ and w13,w13,w14 ++ eor v17.16b,v17.16b,v16.16b ++ ror w15,w15,#2 ++ add w5,w5,w9 ++ eor w13,w13,w3 ++ eor v17.16b,v17.16b,v7.16b ++ add w8,w8,w12 ++ add w9,w9,w15 ++ and w12,w6,w5 ++ add v0.4s,v0.4s,v17.4s ++ bic w15,w7,w5 ++ eor w11,w5,w5,ror#5 ++ add w9,w9,w13 ++ ushr v18.4s,v0.4s,#17 ++ orr w12,w12,w15 ++ ushr v19.4s,v0.4s,#10 ++ eor w11,w11,w5,ror#19 ++ eor w15,w9,w9,ror#11 ++ sli v18.4s,v0.4s,#15 ++ add w8,w8,w12 ++ ushr v17.4s,v0.4s,#19 ++ ror w11,w11,#6 ++ eor w13,w9,w10 ++ eor v19.16b,v19.16b,v18.16b ++ eor w15,w15,w9,ror#20 ++ add w8,w8,w11 ++ sli v17.4s,v0.4s,#13 ++ ldr w12,[sp,#12] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ ld1 {v4.4s},[x16], #16 ++ add w4,w4,w8 ++ eor v19.16b,v19.16b,v17.16b ++ eor w14,w14,w10 ++ eor v17.16b,v17.16b,v17.16b ++ add w7,w7,w12 ++ add w8,w8,w15 ++ and w12,w5,w4 ++ mov v17.d[1],v19.d[0] ++ bic w15,w6,w4 ++ eor w11,w4,w4,ror#5 ++ add w8,w8,w14 ++ add v0.4s,v0.4s,v17.4s ++ orr w12,w12,w15 ++ eor w11,w11,w4,ror#19 ++ eor w15,w8,w8,ror#11 ++ add v4.4s,v4.4s,v0.4s ++ add w7,w7,w12 ++ ror w11,w11,#6 ++ eor w14,w8,w9 ++ eor w15,w15,w8,ror#20 ++ add w7,w7,w11 ++ ldr w12,[sp,#16] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w3,w3,w7 ++ eor w13,w13,w9 ++ st1 {v4.4s},[x17], #16 ++ ext v4.16b,v1.16b,v2.16b,#4 ++ add w6,w6,w12 ++ add w7,w7,w15 ++ and w12,w4,w3 ++ bic w15,w5,w3 ++ ext v7.16b,v3.16b,v0.16b,#4 ++ eor w11,w3,w3,ror#5 ++ add w7,w7,w13 ++ mov d19,v0.d[1] ++ orr w12,w12,w15 ++ eor w11,w11,w3,ror#19 ++ ushr v6.4s,v4.4s,#7 ++ eor w15,w7,w7,ror#11 ++ ushr v5.4s,v4.4s,#3 ++ add w6,w6,w12 ++ add v1.4s,v1.4s,v7.4s ++ ror w11,w11,#6 ++ sli v6.4s,v4.4s,#25 ++ eor w13,w7,w8 ++ eor w15,w15,w7,ror#20 ++ ushr v7.4s,v4.4s,#18 ++ add w6,w6,w11 ++ ldr w12,[sp,#20] ++ and w14,w14,w13 ++ eor v5.16b,v5.16b,v6.16b ++ ror w15,w15,#2 ++ add w10,w10,w6 ++ sli v7.4s,v4.4s,#14 ++ eor w14,w14,w8 ++ ushr v16.4s,v19.4s,#17 ++ add w5,w5,w12 ++ add w6,w6,w15 ++ and w12,w3,w10 ++ eor v5.16b,v5.16b,v7.16b ++ bic w15,w4,w10 ++ eor w11,w10,w10,ror#5 ++ sli v16.4s,v19.4s,#15 ++ add w6,w6,w14 ++ orr w12,w12,w15 ++ ushr v17.4s,v19.4s,#10 ++ eor w11,w11,w10,ror#19 ++ eor w15,w6,w6,ror#11 ++ ushr v7.4s,v19.4s,#19 ++ add w5,w5,w12 ++ ror w11,w11,#6 ++ add v1.4s,v1.4s,v5.4s ++ eor w14,w6,w7 ++ eor w15,w15,w6,ror#20 ++ sli v7.4s,v19.4s,#13 ++ add w5,w5,w11 ++ ldr w12,[sp,#24] ++ and w13,w13,w14 ++ eor v17.16b,v17.16b,v16.16b ++ ror w15,w15,#2 ++ add w9,w9,w5 ++ eor w13,w13,w7 ++ eor v17.16b,v17.16b,v7.16b ++ add w4,w4,w12 ++ add w5,w5,w15 ++ and w12,w10,w9 ++ add v1.4s,v1.4s,v17.4s ++ bic w15,w3,w9 ++ eor w11,w9,w9,ror#5 ++ add w5,w5,w13 ++ ushr v18.4s,v1.4s,#17 ++ orr w12,w12,w15 ++ ushr v19.4s,v1.4s,#10 ++ eor w11,w11,w9,ror#19 ++ eor w15,w5,w5,ror#11 ++ sli v18.4s,v1.4s,#15 ++ add w4,w4,w12 ++ ushr v17.4s,v1.4s,#19 ++ ror w11,w11,#6 ++ eor w13,w5,w6 ++ eor v19.16b,v19.16b,v18.16b ++ eor w15,w15,w5,ror#20 ++ add w4,w4,w11 ++ sli v17.4s,v1.4s,#13 ++ ldr w12,[sp,#28] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ ld1 {v4.4s},[x16], #16 ++ add w8,w8,w4 ++ eor v19.16b,v19.16b,v17.16b ++ eor w14,w14,w6 ++ eor v17.16b,v17.16b,v17.16b ++ add w3,w3,w12 ++ add w4,w4,w15 ++ and w12,w9,w8 ++ mov v17.d[1],v19.d[0] ++ bic w15,w10,w8 ++ eor w11,w8,w8,ror#5 ++ add w4,w4,w14 ++ add v1.4s,v1.4s,v17.4s ++ orr w12,w12,w15 ++ eor w11,w11,w8,ror#19 ++ eor w15,w4,w4,ror#11 ++ add v4.4s,v4.4s,v1.4s ++ add w3,w3,w12 ++ ror w11,w11,#6 ++ eor w14,w4,w5 ++ eor w15,w15,w4,ror#20 ++ add w3,w3,w11 ++ ldr w12,[sp,#32] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w7,w7,w3 ++ eor w13,w13,w5 ++ st1 {v4.4s},[x17], #16 ++ ext v4.16b,v2.16b,v3.16b,#4 ++ add w10,w10,w12 ++ add w3,w3,w15 ++ and w12,w8,w7 ++ bic w15,w9,w7 ++ ext v7.16b,v0.16b,v1.16b,#4 ++ eor w11,w7,w7,ror#5 ++ add w3,w3,w13 ++ mov d19,v1.d[1] ++ orr w12,w12,w15 ++ eor w11,w11,w7,ror#19 ++ ushr v6.4s,v4.4s,#7 ++ eor w15,w3,w3,ror#11 ++ ushr v5.4s,v4.4s,#3 ++ add w10,w10,w12 ++ add v2.4s,v2.4s,v7.4s ++ ror w11,w11,#6 ++ sli v6.4s,v4.4s,#25 ++ eor w13,w3,w4 ++ eor w15,w15,w3,ror#20 ++ ushr v7.4s,v4.4s,#18 ++ add w10,w10,w11 ++ ldr w12,[sp,#36] ++ and w14,w14,w13 ++ eor v5.16b,v5.16b,v6.16b ++ ror w15,w15,#2 ++ add w6,w6,w10 ++ sli v7.4s,v4.4s,#14 ++ eor w14,w14,w4 ++ ushr v16.4s,v19.4s,#17 ++ add w9,w9,w12 ++ add w10,w10,w15 ++ and w12,w7,w6 ++ eor v5.16b,v5.16b,v7.16b ++ bic w15,w8,w6 ++ eor w11,w6,w6,ror#5 ++ sli v16.4s,v19.4s,#15 ++ add w10,w10,w14 ++ orr w12,w12,w15 ++ ushr v17.4s,v19.4s,#10 ++ eor w11,w11,w6,ror#19 ++ eor w15,w10,w10,ror#11 ++ ushr v7.4s,v19.4s,#19 ++ add w9,w9,w12 ++ ror w11,w11,#6 ++ add v2.4s,v2.4s,v5.4s ++ eor w14,w10,w3 ++ eor w15,w15,w10,ror#20 ++ sli v7.4s,v19.4s,#13 ++ add w9,w9,w11 ++ ldr w12,[sp,#40] ++ and w13,w13,w14 ++ eor v17.16b,v17.16b,v16.16b ++ ror w15,w15,#2 ++ add w5,w5,w9 ++ eor w13,w13,w3 ++ eor v17.16b,v17.16b,v7.16b ++ add w8,w8,w12 ++ add w9,w9,w15 ++ and w12,w6,w5 ++ add v2.4s,v2.4s,v17.4s ++ bic w15,w7,w5 ++ eor w11,w5,w5,ror#5 ++ add w9,w9,w13 ++ ushr v18.4s,v2.4s,#17 ++ orr w12,w12,w15 ++ ushr v19.4s,v2.4s,#10 ++ eor w11,w11,w5,ror#19 ++ eor w15,w9,w9,ror#11 ++ sli v18.4s,v2.4s,#15 ++ add w8,w8,w12 ++ ushr v17.4s,v2.4s,#19 ++ ror w11,w11,#6 ++ eor w13,w9,w10 ++ eor v19.16b,v19.16b,v18.16b ++ eor w15,w15,w9,ror#20 ++ add w8,w8,w11 ++ sli v17.4s,v2.4s,#13 ++ ldr w12,[sp,#44] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ ld1 {v4.4s},[x16], #16 ++ add w4,w4,w8 ++ eor v19.16b,v19.16b,v17.16b ++ eor w14,w14,w10 ++ eor v17.16b,v17.16b,v17.16b ++ add w7,w7,w12 ++ add w8,w8,w15 ++ and w12,w5,w4 ++ mov v17.d[1],v19.d[0] ++ bic w15,w6,w4 ++ eor w11,w4,w4,ror#5 ++ add w8,w8,w14 ++ add v2.4s,v2.4s,v17.4s ++ orr w12,w12,w15 ++ eor w11,w11,w4,ror#19 ++ eor w15,w8,w8,ror#11 ++ add v4.4s,v4.4s,v2.4s ++ add w7,w7,w12 ++ ror w11,w11,#6 ++ eor w14,w8,w9 ++ eor w15,w15,w8,ror#20 ++ add w7,w7,w11 ++ ldr w12,[sp,#48] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w3,w3,w7 ++ eor w13,w13,w9 ++ st1 {v4.4s},[x17], #16 ++ ext v4.16b,v3.16b,v0.16b,#4 ++ add w6,w6,w12 ++ add w7,w7,w15 ++ and w12,w4,w3 ++ bic w15,w5,w3 ++ ext v7.16b,v1.16b,v2.16b,#4 ++ eor w11,w3,w3,ror#5 ++ add w7,w7,w13 ++ mov d19,v2.d[1] ++ orr w12,w12,w15 ++ eor w11,w11,w3,ror#19 ++ ushr v6.4s,v4.4s,#7 ++ eor w15,w7,w7,ror#11 ++ ushr v5.4s,v4.4s,#3 ++ add w6,w6,w12 ++ add v3.4s,v3.4s,v7.4s ++ ror w11,w11,#6 ++ sli v6.4s,v4.4s,#25 ++ eor w13,w7,w8 ++ eor w15,w15,w7,ror#20 ++ ushr v7.4s,v4.4s,#18 ++ add w6,w6,w11 ++ ldr w12,[sp,#52] ++ and w14,w14,w13 ++ eor v5.16b,v5.16b,v6.16b ++ ror w15,w15,#2 ++ add w10,w10,w6 ++ sli v7.4s,v4.4s,#14 ++ eor w14,w14,w8 ++ ushr v16.4s,v19.4s,#17 ++ add w5,w5,w12 ++ add w6,w6,w15 ++ and w12,w3,w10 ++ eor v5.16b,v5.16b,v7.16b ++ bic w15,w4,w10 ++ eor w11,w10,w10,ror#5 ++ sli v16.4s,v19.4s,#15 ++ add w6,w6,w14 ++ orr w12,w12,w15 ++ ushr v17.4s,v19.4s,#10 ++ eor w11,w11,w10,ror#19 ++ eor w15,w6,w6,ror#11 ++ ushr v7.4s,v19.4s,#19 ++ add w5,w5,w12 ++ ror w11,w11,#6 ++ add v3.4s,v3.4s,v5.4s ++ eor w14,w6,w7 ++ eor w15,w15,w6,ror#20 ++ sli v7.4s,v19.4s,#13 ++ add w5,w5,w11 ++ ldr w12,[sp,#56] ++ and w13,w13,w14 ++ eor v17.16b,v17.16b,v16.16b ++ ror w15,w15,#2 ++ add w9,w9,w5 ++ eor w13,w13,w7 ++ eor v17.16b,v17.16b,v7.16b ++ add w4,w4,w12 ++ add w5,w5,w15 ++ and w12,w10,w9 ++ add v3.4s,v3.4s,v17.4s ++ bic w15,w3,w9 ++ eor w11,w9,w9,ror#5 ++ add w5,w5,w13 ++ ushr v18.4s,v3.4s,#17 ++ orr w12,w12,w15 ++ ushr v19.4s,v3.4s,#10 ++ eor w11,w11,w9,ror#19 ++ eor w15,w5,w5,ror#11 ++ sli v18.4s,v3.4s,#15 ++ add w4,w4,w12 ++ ushr v17.4s,v3.4s,#19 ++ ror w11,w11,#6 ++ eor w13,w5,w6 ++ eor v19.16b,v19.16b,v18.16b ++ eor w15,w15,w5,ror#20 ++ add w4,w4,w11 ++ sli v17.4s,v3.4s,#13 ++ ldr w12,[sp,#60] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ ld1 {v4.4s},[x16], #16 ++ add w8,w8,w4 ++ eor v19.16b,v19.16b,v17.16b ++ eor w14,w14,w6 ++ eor v17.16b,v17.16b,v17.16b ++ add w3,w3,w12 ++ add w4,w4,w15 ++ and w12,w9,w8 ++ mov v17.d[1],v19.d[0] ++ bic w15,w10,w8 ++ eor w11,w8,w8,ror#5 ++ add w4,w4,w14 ++ add v3.4s,v3.4s,v17.4s ++ orr w12,w12,w15 ++ eor w11,w11,w8,ror#19 ++ eor w15,w4,w4,ror#11 ++ add v4.4s,v4.4s,v3.4s ++ add w3,w3,w12 ++ ror w11,w11,#6 ++ eor w14,w4,w5 ++ eor w15,w15,w4,ror#20 ++ add w3,w3,w11 ++ ldr w12,[x16] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w7,w7,w3 ++ eor w13,w13,w5 ++ st1 {v4.4s},[x17], #16 ++ cmp w12,#0 // check for K256 terminator ++ ldr w12,[sp,#0] ++ sub x17,x17,#64 ++ bne .L_00_48 ++ ++ sub x16,x16,#256 // rewind x16 ++ cmp x1,x2 ++ mov x17, #64 ++ csel x17, x17, xzr, eq ++ sub x1,x1,x17 // avoid SEGV ++ mov x17,sp ++ add w10,w10,w12 ++ add w3,w3,w15 ++ and w12,w8,w7 ++ ld1 {v0.16b},[x1],#16 ++ bic w15,w9,w7 ++ eor w11,w7,w7,ror#5 ++ ld1 {v4.4s},[x16],#16 ++ add w3,w3,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w7,ror#19 ++ eor w15,w3,w3,ror#11 ++ rev32 v0.16b,v0.16b ++ add w10,w10,w12 ++ ror w11,w11,#6 ++ eor w13,w3,w4 ++ eor w15,w15,w3,ror#20 ++ add v4.4s,v4.4s,v0.4s ++ add w10,w10,w11 ++ ldr w12,[sp,#4] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w6,w6,w10 ++ eor w14,w14,w4 ++ add w9,w9,w12 ++ add w10,w10,w15 ++ and w12,w7,w6 ++ bic w15,w8,w6 ++ eor w11,w6,w6,ror#5 ++ add w10,w10,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w6,ror#19 ++ eor w15,w10,w10,ror#11 ++ add w9,w9,w12 ++ ror w11,w11,#6 ++ eor w14,w10,w3 ++ eor w15,w15,w10,ror#20 ++ add w9,w9,w11 ++ ldr w12,[sp,#8] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w5,w5,w9 ++ eor w13,w13,w3 ++ add w8,w8,w12 ++ add w9,w9,w15 ++ and w12,w6,w5 ++ bic w15,w7,w5 ++ eor w11,w5,w5,ror#5 ++ add w9,w9,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w5,ror#19 ++ eor w15,w9,w9,ror#11 ++ add w8,w8,w12 ++ ror w11,w11,#6 ++ eor w13,w9,w10 ++ eor w15,w15,w9,ror#20 ++ add w8,w8,w11 ++ ldr w12,[sp,#12] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w4,w4,w8 ++ eor w14,w14,w10 ++ add w7,w7,w12 ++ add w8,w8,w15 ++ and w12,w5,w4 ++ bic w15,w6,w4 ++ eor w11,w4,w4,ror#5 ++ add w8,w8,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w4,ror#19 ++ eor w15,w8,w8,ror#11 ++ add w7,w7,w12 ++ ror w11,w11,#6 ++ eor w14,w8,w9 ++ eor w15,w15,w8,ror#20 ++ add w7,w7,w11 ++ ldr w12,[sp,#16] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w3,w3,w7 ++ eor w13,w13,w9 ++ st1 {v4.4s},[x17], #16 ++ add w6,w6,w12 ++ add w7,w7,w15 ++ and w12,w4,w3 ++ ld1 {v1.16b},[x1],#16 ++ bic w15,w5,w3 ++ eor w11,w3,w3,ror#5 ++ ld1 {v4.4s},[x16],#16 ++ add w7,w7,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w3,ror#19 ++ eor w15,w7,w7,ror#11 ++ rev32 v1.16b,v1.16b ++ add w6,w6,w12 ++ ror w11,w11,#6 ++ eor w13,w7,w8 ++ eor w15,w15,w7,ror#20 ++ add v4.4s,v4.4s,v1.4s ++ add w6,w6,w11 ++ ldr w12,[sp,#20] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w10,w10,w6 ++ eor w14,w14,w8 ++ add w5,w5,w12 ++ add w6,w6,w15 ++ and w12,w3,w10 ++ bic w15,w4,w10 ++ eor w11,w10,w10,ror#5 ++ add w6,w6,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w10,ror#19 ++ eor w15,w6,w6,ror#11 ++ add w5,w5,w12 ++ ror w11,w11,#6 ++ eor w14,w6,w7 ++ eor w15,w15,w6,ror#20 ++ add w5,w5,w11 ++ ldr w12,[sp,#24] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w9,w9,w5 ++ eor w13,w13,w7 ++ add w4,w4,w12 ++ add w5,w5,w15 ++ and w12,w10,w9 ++ bic w15,w3,w9 ++ eor w11,w9,w9,ror#5 ++ add w5,w5,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w9,ror#19 ++ eor w15,w5,w5,ror#11 ++ add w4,w4,w12 ++ ror w11,w11,#6 ++ eor w13,w5,w6 ++ eor w15,w15,w5,ror#20 ++ add w4,w4,w11 ++ ldr w12,[sp,#28] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w8,w8,w4 ++ eor w14,w14,w6 ++ add w3,w3,w12 ++ add w4,w4,w15 ++ and w12,w9,w8 ++ bic w15,w10,w8 ++ eor w11,w8,w8,ror#5 ++ add w4,w4,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w8,ror#19 ++ eor w15,w4,w4,ror#11 ++ add w3,w3,w12 ++ ror w11,w11,#6 ++ eor w14,w4,w5 ++ eor w15,w15,w4,ror#20 ++ add w3,w3,w11 ++ ldr w12,[sp,#32] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w7,w7,w3 ++ eor w13,w13,w5 ++ st1 {v4.4s},[x17], #16 ++ add w10,w10,w12 ++ add w3,w3,w15 ++ and w12,w8,w7 ++ ld1 {v2.16b},[x1],#16 ++ bic w15,w9,w7 ++ eor w11,w7,w7,ror#5 ++ ld1 {v4.4s},[x16],#16 ++ add w3,w3,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w7,ror#19 ++ eor w15,w3,w3,ror#11 ++ rev32 v2.16b,v2.16b ++ add w10,w10,w12 ++ ror w11,w11,#6 ++ eor w13,w3,w4 ++ eor w15,w15,w3,ror#20 ++ add v4.4s,v4.4s,v2.4s ++ add w10,w10,w11 ++ ldr w12,[sp,#36] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w6,w6,w10 ++ eor w14,w14,w4 ++ add w9,w9,w12 ++ add w10,w10,w15 ++ and w12,w7,w6 ++ bic w15,w8,w6 ++ eor w11,w6,w6,ror#5 ++ add w10,w10,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w6,ror#19 ++ eor w15,w10,w10,ror#11 ++ add w9,w9,w12 ++ ror w11,w11,#6 ++ eor w14,w10,w3 ++ eor w15,w15,w10,ror#20 ++ add w9,w9,w11 ++ ldr w12,[sp,#40] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w5,w5,w9 ++ eor w13,w13,w3 ++ add w8,w8,w12 ++ add w9,w9,w15 ++ and w12,w6,w5 ++ bic w15,w7,w5 ++ eor w11,w5,w5,ror#5 ++ add w9,w9,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w5,ror#19 ++ eor w15,w9,w9,ror#11 ++ add w8,w8,w12 ++ ror w11,w11,#6 ++ eor w13,w9,w10 ++ eor w15,w15,w9,ror#20 ++ add w8,w8,w11 ++ ldr w12,[sp,#44] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w4,w4,w8 ++ eor w14,w14,w10 ++ add w7,w7,w12 ++ add w8,w8,w15 ++ and w12,w5,w4 ++ bic w15,w6,w4 ++ eor w11,w4,w4,ror#5 ++ add w8,w8,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w4,ror#19 ++ eor w15,w8,w8,ror#11 ++ add w7,w7,w12 ++ ror w11,w11,#6 ++ eor w14,w8,w9 ++ eor w15,w15,w8,ror#20 ++ add w7,w7,w11 ++ ldr w12,[sp,#48] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w3,w3,w7 ++ eor w13,w13,w9 ++ st1 {v4.4s},[x17], #16 ++ add w6,w6,w12 ++ add w7,w7,w15 ++ and w12,w4,w3 ++ ld1 {v3.16b},[x1],#16 ++ bic w15,w5,w3 ++ eor w11,w3,w3,ror#5 ++ ld1 {v4.4s},[x16],#16 ++ add w7,w7,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w3,ror#19 ++ eor w15,w7,w7,ror#11 ++ rev32 v3.16b,v3.16b ++ add w6,w6,w12 ++ ror w11,w11,#6 ++ eor w13,w7,w8 ++ eor w15,w15,w7,ror#20 ++ add v4.4s,v4.4s,v3.4s ++ add w6,w6,w11 ++ ldr w12,[sp,#52] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w10,w10,w6 ++ eor w14,w14,w8 ++ add w5,w5,w12 ++ add w6,w6,w15 ++ and w12,w3,w10 ++ bic w15,w4,w10 ++ eor w11,w10,w10,ror#5 ++ add w6,w6,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w10,ror#19 ++ eor w15,w6,w6,ror#11 ++ add w5,w5,w12 ++ ror w11,w11,#6 ++ eor w14,w6,w7 ++ eor w15,w15,w6,ror#20 ++ add w5,w5,w11 ++ ldr w12,[sp,#56] ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w9,w9,w5 ++ eor w13,w13,w7 ++ add w4,w4,w12 ++ add w5,w5,w15 ++ and w12,w10,w9 ++ bic w15,w3,w9 ++ eor w11,w9,w9,ror#5 ++ add w5,w5,w13 ++ orr w12,w12,w15 ++ eor w11,w11,w9,ror#19 ++ eor w15,w5,w5,ror#11 ++ add w4,w4,w12 ++ ror w11,w11,#6 ++ eor w13,w5,w6 ++ eor w15,w15,w5,ror#20 ++ add w4,w4,w11 ++ ldr w12,[sp,#60] ++ and w14,w14,w13 ++ ror w15,w15,#2 ++ add w8,w8,w4 ++ eor w14,w14,w6 ++ add w3,w3,w12 ++ add w4,w4,w15 ++ and w12,w9,w8 ++ bic w15,w10,w8 ++ eor w11,w8,w8,ror#5 ++ add w4,w4,w14 ++ orr w12,w12,w15 ++ eor w11,w11,w8,ror#19 ++ eor w15,w4,w4,ror#11 ++ add w3,w3,w12 ++ ror w11,w11,#6 ++ eor w14,w4,w5 ++ eor w15,w15,w4,ror#20 ++ add w3,w3,w11 ++ and w13,w13,w14 ++ ror w15,w15,#2 ++ add w7,w7,w3 ++ eor w13,w13,w5 ++ st1 {v4.4s},[x17], #16 ++ add w3,w3,w15 // h+=Sigma0(a) from the past ++ ldp w11,w12,[x0,#0] ++ add w3,w3,w13 // h+=Maj(a,b,c) from the past ++ ldp w13,w14,[x0,#8] ++ add w3,w3,w11 // accumulate ++ add w4,w4,w12 ++ ldp w11,w12,[x0,#16] ++ add w5,w5,w13 ++ add w6,w6,w14 ++ ldp w13,w14,[x0,#24] ++ add w7,w7,w11 ++ add w8,w8,w12 ++ ldr w12,[sp,#0] ++ stp w3,w4,[x0,#0] ++ add w9,w9,w13 ++ mov w13,wzr ++ stp w5,w6,[x0,#8] ++ add w10,w10,w14 ++ stp w7,w8,[x0,#16] ++ eor w14,w4,w5 ++ stp w9,w10,[x0,#24] ++ mov w15,wzr ++ mov x17,sp ++ b.ne .L_00_48 ++ ++ ldr x29,[x29] ++ add sp,sp,#16*4+16 ++ ret ++.size sha256_block_neon,.-sha256_block_neon ++#ifndef __KERNEL__ ++.comm OPENSSL_armcap_P,4,4 ++#endif +--- /dev/null ++++ b/arch/arm64/crypto/sha512-core.S +@@ -0,0 +1,1085 @@ ++// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. ++// ++// Licensed under the OpenSSL license (the "License"). You may not use ++// this file except in compliance with the License. You can obtain a copy ++// in the file LICENSE in the source distribution or at ++// https://www.openssl.org/source/license.html ++ ++// ==================================================================== ++// Written by Andy Polyakov for the OpenSSL ++// project. The module is, however, dual licensed under OpenSSL and ++// CRYPTOGAMS licenses depending on where you obtain it. For further ++// details see http://www.openssl.org/~appro/cryptogams/. ++// ++// Permission to use under GPLv2 terms is granted. ++// ==================================================================== ++// ++// SHA256/512 for ARMv8. ++// ++// Performance in cycles per processed byte and improvement coefficient ++// over code generated with "default" compiler: ++// ++// SHA256-hw SHA256(*) SHA512 ++// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) ++// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) ++// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) ++// Denver 2.01 10.5 (+26%) 6.70 (+8%) ++// X-Gene 20.0 (+100%) 12.8 (+300%(***)) ++// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) ++// ++// (*) Software SHA256 results are of lesser relevance, presented ++// mostly for informational purposes. ++// (**) The result is a trade-off: it's possible to improve it by ++// 10% (or by 1 cycle per round), but at the cost of 20% loss ++// on Cortex-A53 (or by 4 cycles per round). ++// (***) Super-impressive coefficients over gcc-generated code are ++// indication of some compiler "pathology", most notably code ++// generated with -mgeneral-regs-only is significanty faster ++// and the gap is only 40-90%. ++// ++// October 2016. ++// ++// Originally it was reckoned that it makes no sense to implement NEON ++// version of SHA256 for 64-bit processors. This is because performance ++// improvement on most wide-spread Cortex-A5x processors was observed ++// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was ++// observed that 32-bit NEON SHA256 performs significantly better than ++// 64-bit scalar version on *some* of the more recent processors. As ++// result 64-bit NEON version of SHA256 was added to provide best ++// all-round performance. For example it executes ~30% faster on X-Gene ++// and Mongoose. [For reference, NEON version of SHA512 is bound to ++// deliver much less improvement, likely *negative* on Cortex-A5x. ++// Which is why NEON support is limited to SHA256.] ++ ++#ifndef __KERNEL__ ++# include "arm_arch.h" ++#endif ++ ++.text ++ ++.extern OPENSSL_armcap_P ++.globl sha512_block_data_order ++.type sha512_block_data_order,%function ++.align 6 ++sha512_block_data_order: ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ sub sp,sp,#4*8 ++ ++ ldp x20,x21,[x0] // load context ++ ldp x22,x23,[x0,#2*8] ++ ldp x24,x25,[x0,#4*8] ++ add x2,x1,x2,lsl#7 // end of input ++ ldp x26,x27,[x0,#6*8] ++ adr x30,.LK512 ++ stp x0,x2,[x29,#96] ++ ++.Loop: ++ ldp x3,x4,[x1],#2*8 ++ ldr x19,[x30],#8 // *K++ ++ eor x28,x21,x22 // magic seed ++ str x1,[x29,#112] ++#ifndef __AARCH64EB__ ++ rev x3,x3 // 0 ++#endif ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ eor x6,x24,x24,ror#23 ++ and x17,x25,x24 ++ bic x19,x26,x24 ++ add x27,x27,x3 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x6,ror#18 // Sigma1(e) ++ ror x6,x20,#28 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ eor x17,x20,x20,ror#5 ++ add x27,x27,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x23,x23,x27 // d+=h ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x6,x17,ror#34 // Sigma0(a) ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x27,x27,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x4,x4 // 1 ++#endif ++ ldp x5,x6,[x1],#2*8 ++ add x27,x27,x17 // h+=Sigma0(a) ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ eor x7,x23,x23,ror#23 ++ and x17,x24,x23 ++ bic x28,x25,x23 ++ add x26,x26,x4 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x7,ror#18 // Sigma1(e) ++ ror x7,x27,#28 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ eor x17,x27,x27,ror#5 ++ add x26,x26,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x22,x22,x26 // d+=h ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x7,x17,ror#34 // Sigma0(a) ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x26,x26,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x5,x5 // 2 ++#endif ++ add x26,x26,x17 // h+=Sigma0(a) ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ eor x8,x22,x22,ror#23 ++ and x17,x23,x22 ++ bic x19,x24,x22 ++ add x25,x25,x5 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x8,ror#18 // Sigma1(e) ++ ror x8,x26,#28 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ eor x17,x26,x26,ror#5 ++ add x25,x25,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x21,x21,x25 // d+=h ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x8,x17,ror#34 // Sigma0(a) ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x25,x25,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x6,x6 // 3 ++#endif ++ ldp x7,x8,[x1],#2*8 ++ add x25,x25,x17 // h+=Sigma0(a) ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ eor x9,x21,x21,ror#23 ++ and x17,x22,x21 ++ bic x28,x23,x21 ++ add x24,x24,x6 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x9,ror#18 // Sigma1(e) ++ ror x9,x25,#28 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ eor x17,x25,x25,ror#5 ++ add x24,x24,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x20,x20,x24 // d+=h ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x9,x17,ror#34 // Sigma0(a) ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x24,x24,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x7,x7 // 4 ++#endif ++ add x24,x24,x17 // h+=Sigma0(a) ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ eor x10,x20,x20,ror#23 ++ and x17,x21,x20 ++ bic x19,x22,x20 ++ add x23,x23,x7 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x10,ror#18 // Sigma1(e) ++ ror x10,x24,#28 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ eor x17,x24,x24,ror#5 ++ add x23,x23,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x27,x27,x23 // d+=h ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x10,x17,ror#34 // Sigma0(a) ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x23,x23,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x8,x8 // 5 ++#endif ++ ldp x9,x10,[x1],#2*8 ++ add x23,x23,x17 // h+=Sigma0(a) ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ eor x11,x27,x27,ror#23 ++ and x17,x20,x27 ++ bic x28,x21,x27 ++ add x22,x22,x8 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x11,ror#18 // Sigma1(e) ++ ror x11,x23,#28 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ eor x17,x23,x23,ror#5 ++ add x22,x22,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x26,x26,x22 // d+=h ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x11,x17,ror#34 // Sigma0(a) ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x22,x22,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x9,x9 // 6 ++#endif ++ add x22,x22,x17 // h+=Sigma0(a) ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ eor x12,x26,x26,ror#23 ++ and x17,x27,x26 ++ bic x19,x20,x26 ++ add x21,x21,x9 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x12,ror#18 // Sigma1(e) ++ ror x12,x22,#28 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ eor x17,x22,x22,ror#5 ++ add x21,x21,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x25,x25,x21 // d+=h ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x12,x17,ror#34 // Sigma0(a) ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x21,x21,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x10,x10 // 7 ++#endif ++ ldp x11,x12,[x1],#2*8 ++ add x21,x21,x17 // h+=Sigma0(a) ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ eor x13,x25,x25,ror#23 ++ and x17,x26,x25 ++ bic x28,x27,x25 ++ add x20,x20,x10 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x13,ror#18 // Sigma1(e) ++ ror x13,x21,#28 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ eor x17,x21,x21,ror#5 ++ add x20,x20,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x24,x24,x20 // d+=h ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x13,x17,ror#34 // Sigma0(a) ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x20,x20,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x11,x11 // 8 ++#endif ++ add x20,x20,x17 // h+=Sigma0(a) ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ eor x14,x24,x24,ror#23 ++ and x17,x25,x24 ++ bic x19,x26,x24 ++ add x27,x27,x11 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x14,ror#18 // Sigma1(e) ++ ror x14,x20,#28 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ eor x17,x20,x20,ror#5 ++ add x27,x27,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x23,x23,x27 // d+=h ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x14,x17,ror#34 // Sigma0(a) ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x27,x27,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x12,x12 // 9 ++#endif ++ ldp x13,x14,[x1],#2*8 ++ add x27,x27,x17 // h+=Sigma0(a) ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ eor x15,x23,x23,ror#23 ++ and x17,x24,x23 ++ bic x28,x25,x23 ++ add x26,x26,x12 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x15,ror#18 // Sigma1(e) ++ ror x15,x27,#28 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ eor x17,x27,x27,ror#5 ++ add x26,x26,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x22,x22,x26 // d+=h ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x15,x17,ror#34 // Sigma0(a) ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x26,x26,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x13,x13 // 10 ++#endif ++ add x26,x26,x17 // h+=Sigma0(a) ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ eor x0,x22,x22,ror#23 ++ and x17,x23,x22 ++ bic x19,x24,x22 ++ add x25,x25,x13 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x0,ror#18 // Sigma1(e) ++ ror x0,x26,#28 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ eor x17,x26,x26,ror#5 ++ add x25,x25,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x21,x21,x25 // d+=h ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x0,x17,ror#34 // Sigma0(a) ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x25,x25,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x14,x14 // 11 ++#endif ++ ldp x15,x0,[x1],#2*8 ++ add x25,x25,x17 // h+=Sigma0(a) ++ str x6,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ eor x6,x21,x21,ror#23 ++ and x17,x22,x21 ++ bic x28,x23,x21 ++ add x24,x24,x14 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x6,ror#18 // Sigma1(e) ++ ror x6,x25,#28 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ eor x17,x25,x25,ror#5 ++ add x24,x24,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x20,x20,x24 // d+=h ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x6,x17,ror#34 // Sigma0(a) ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x24,x24,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x15,x15 // 12 ++#endif ++ add x24,x24,x17 // h+=Sigma0(a) ++ str x7,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ eor x7,x20,x20,ror#23 ++ and x17,x21,x20 ++ bic x19,x22,x20 ++ add x23,x23,x15 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x7,ror#18 // Sigma1(e) ++ ror x7,x24,#28 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ eor x17,x24,x24,ror#5 ++ add x23,x23,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x27,x27,x23 // d+=h ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x7,x17,ror#34 // Sigma0(a) ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x23,x23,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x0,x0 // 13 ++#endif ++ ldp x1,x2,[x1] ++ add x23,x23,x17 // h+=Sigma0(a) ++ str x8,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ eor x8,x27,x27,ror#23 ++ and x17,x20,x27 ++ bic x28,x21,x27 ++ add x22,x22,x0 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x8,ror#18 // Sigma1(e) ++ ror x8,x23,#28 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ eor x17,x23,x23,ror#5 ++ add x22,x22,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x26,x26,x22 // d+=h ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x8,x17,ror#34 // Sigma0(a) ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x22,x22,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x1,x1 // 14 ++#endif ++ ldr x6,[sp,#24] ++ add x22,x22,x17 // h+=Sigma0(a) ++ str x9,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ eor x9,x26,x26,ror#23 ++ and x17,x27,x26 ++ bic x19,x20,x26 ++ add x21,x21,x1 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x9,ror#18 // Sigma1(e) ++ ror x9,x22,#28 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ eor x17,x22,x22,ror#5 ++ add x21,x21,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x25,x25,x21 // d+=h ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x9,x17,ror#34 // Sigma0(a) ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x21,x21,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x2,x2 // 15 ++#endif ++ ldr x7,[sp,#0] ++ add x21,x21,x17 // h+=Sigma0(a) ++ str x10,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x9,x4,#1 ++ and x17,x26,x25 ++ ror x8,x1,#19 ++ bic x28,x27,x25 ++ ror x10,x21,#28 ++ add x20,x20,x2 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x9,x9,x4,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x10,x10,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x8,x8,x1,ror#61 ++ eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x10,x21,ror#39 // Sigma0(a) ++ eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) ++ add x3,x3,x12 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x3,x3,x9 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x3,x3,x8 ++.Loop_16_xx: ++ ldr x8,[sp,#8] ++ str x11,[sp,#0] ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ ror x10,x5,#1 ++ and x17,x25,x24 ++ ror x9,x2,#19 ++ bic x19,x26,x24 ++ ror x11,x20,#28 ++ add x27,x27,x3 // h+=X[i] ++ eor x16,x16,x24,ror#18 ++ eor x10,x10,x5,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x24,ror#41 // Sigma1(e) ++ eor x11,x11,x20,ror#34 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x9,x9,x2,ror#61 ++ eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) ++ add x27,x27,x16 // h+=Sigma1(e) ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x11,x20,ror#39 // Sigma0(a) ++ eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) ++ add x4,x4,x13 ++ add x23,x23,x27 // d+=h ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x4,x4,x10 ++ add x27,x27,x17 // h+=Sigma0(a) ++ add x4,x4,x9 ++ ldr x9,[sp,#16] ++ str x12,[sp,#8] ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ ror x11,x6,#1 ++ and x17,x24,x23 ++ ror x10,x3,#19 ++ bic x28,x25,x23 ++ ror x12,x27,#28 ++ add x26,x26,x4 // h+=X[i] ++ eor x16,x16,x23,ror#18 ++ eor x11,x11,x6,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x23,ror#41 // Sigma1(e) ++ eor x12,x12,x27,ror#34 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x10,x10,x3,ror#61 ++ eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) ++ add x26,x26,x16 // h+=Sigma1(e) ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x12,x27,ror#39 // Sigma0(a) ++ eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) ++ add x5,x5,x14 ++ add x22,x22,x26 // d+=h ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x5,x5,x11 ++ add x26,x26,x17 // h+=Sigma0(a) ++ add x5,x5,x10 ++ ldr x10,[sp,#24] ++ str x13,[sp,#16] ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ ror x12,x7,#1 ++ and x17,x23,x22 ++ ror x11,x4,#19 ++ bic x19,x24,x22 ++ ror x13,x26,#28 ++ add x25,x25,x5 // h+=X[i] ++ eor x16,x16,x22,ror#18 ++ eor x12,x12,x7,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x22,ror#41 // Sigma1(e) ++ eor x13,x13,x26,ror#34 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x11,x11,x4,ror#61 ++ eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) ++ add x25,x25,x16 // h+=Sigma1(e) ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x13,x26,ror#39 // Sigma0(a) ++ eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) ++ add x6,x6,x15 ++ add x21,x21,x25 // d+=h ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x6,x6,x12 ++ add x25,x25,x17 // h+=Sigma0(a) ++ add x6,x6,x11 ++ ldr x11,[sp,#0] ++ str x14,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ ror x13,x8,#1 ++ and x17,x22,x21 ++ ror x12,x5,#19 ++ bic x28,x23,x21 ++ ror x14,x25,#28 ++ add x24,x24,x6 // h+=X[i] ++ eor x16,x16,x21,ror#18 ++ eor x13,x13,x8,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x21,ror#41 // Sigma1(e) ++ eor x14,x14,x25,ror#34 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x12,x12,x5,ror#61 ++ eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) ++ add x24,x24,x16 // h+=Sigma1(e) ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x14,x25,ror#39 // Sigma0(a) ++ eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) ++ add x7,x7,x0 ++ add x20,x20,x24 // d+=h ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x7,x7,x13 ++ add x24,x24,x17 // h+=Sigma0(a) ++ add x7,x7,x12 ++ ldr x12,[sp,#8] ++ str x15,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ ror x14,x9,#1 ++ and x17,x21,x20 ++ ror x13,x6,#19 ++ bic x19,x22,x20 ++ ror x15,x24,#28 ++ add x23,x23,x7 // h+=X[i] ++ eor x16,x16,x20,ror#18 ++ eor x14,x14,x9,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x20,ror#41 // Sigma1(e) ++ eor x15,x15,x24,ror#34 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x13,x13,x6,ror#61 ++ eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) ++ add x23,x23,x16 // h+=Sigma1(e) ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x15,x24,ror#39 // Sigma0(a) ++ eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) ++ add x8,x8,x1 ++ add x27,x27,x23 // d+=h ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x8,x8,x14 ++ add x23,x23,x17 // h+=Sigma0(a) ++ add x8,x8,x13 ++ ldr x13,[sp,#16] ++ str x0,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ ror x15,x10,#1 ++ and x17,x20,x27 ++ ror x14,x7,#19 ++ bic x28,x21,x27 ++ ror x0,x23,#28 ++ add x22,x22,x8 // h+=X[i] ++ eor x16,x16,x27,ror#18 ++ eor x15,x15,x10,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x27,ror#41 // Sigma1(e) ++ eor x0,x0,x23,ror#34 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x14,x14,x7,ror#61 ++ eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) ++ add x22,x22,x16 // h+=Sigma1(e) ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x0,x23,ror#39 // Sigma0(a) ++ eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) ++ add x9,x9,x2 ++ add x26,x26,x22 // d+=h ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x9,x9,x15 ++ add x22,x22,x17 // h+=Sigma0(a) ++ add x9,x9,x14 ++ ldr x14,[sp,#24] ++ str x1,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ ror x0,x11,#1 ++ and x17,x27,x26 ++ ror x15,x8,#19 ++ bic x19,x20,x26 ++ ror x1,x22,#28 ++ add x21,x21,x9 // h+=X[i] ++ eor x16,x16,x26,ror#18 ++ eor x0,x0,x11,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x26,ror#41 // Sigma1(e) ++ eor x1,x1,x22,ror#34 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x15,x15,x8,ror#61 ++ eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) ++ add x21,x21,x16 // h+=Sigma1(e) ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x1,x22,ror#39 // Sigma0(a) ++ eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) ++ add x10,x10,x3 ++ add x25,x25,x21 // d+=h ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x10,x10,x0 ++ add x21,x21,x17 // h+=Sigma0(a) ++ add x10,x10,x15 ++ ldr x15,[sp,#0] ++ str x2,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x1,x12,#1 ++ and x17,x26,x25 ++ ror x0,x9,#19 ++ bic x28,x27,x25 ++ ror x2,x21,#28 ++ add x20,x20,x10 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x1,x1,x12,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x2,x2,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x0,x0,x9,ror#61 ++ eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x2,x21,ror#39 // Sigma0(a) ++ eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) ++ add x11,x11,x4 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x11,x11,x1 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x11,x11,x0 ++ ldr x0,[sp,#8] ++ str x3,[sp,#0] ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ ror x2,x13,#1 ++ and x17,x25,x24 ++ ror x1,x10,#19 ++ bic x19,x26,x24 ++ ror x3,x20,#28 ++ add x27,x27,x11 // h+=X[i] ++ eor x16,x16,x24,ror#18 ++ eor x2,x2,x13,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x24,ror#41 // Sigma1(e) ++ eor x3,x3,x20,ror#34 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x1,x1,x10,ror#61 ++ eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) ++ add x27,x27,x16 // h+=Sigma1(e) ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x3,x20,ror#39 // Sigma0(a) ++ eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) ++ add x12,x12,x5 ++ add x23,x23,x27 // d+=h ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x12,x12,x2 ++ add x27,x27,x17 // h+=Sigma0(a) ++ add x12,x12,x1 ++ ldr x1,[sp,#16] ++ str x4,[sp,#8] ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ ror x3,x14,#1 ++ and x17,x24,x23 ++ ror x2,x11,#19 ++ bic x28,x25,x23 ++ ror x4,x27,#28 ++ add x26,x26,x12 // h+=X[i] ++ eor x16,x16,x23,ror#18 ++ eor x3,x3,x14,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x23,ror#41 // Sigma1(e) ++ eor x4,x4,x27,ror#34 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x2,x2,x11,ror#61 ++ eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) ++ add x26,x26,x16 // h+=Sigma1(e) ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x4,x27,ror#39 // Sigma0(a) ++ eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) ++ add x13,x13,x6 ++ add x22,x22,x26 // d+=h ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x13,x13,x3 ++ add x26,x26,x17 // h+=Sigma0(a) ++ add x13,x13,x2 ++ ldr x2,[sp,#24] ++ str x5,[sp,#16] ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ ror x4,x15,#1 ++ and x17,x23,x22 ++ ror x3,x12,#19 ++ bic x19,x24,x22 ++ ror x5,x26,#28 ++ add x25,x25,x13 // h+=X[i] ++ eor x16,x16,x22,ror#18 ++ eor x4,x4,x15,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x22,ror#41 // Sigma1(e) ++ eor x5,x5,x26,ror#34 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x3,x3,x12,ror#61 ++ eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) ++ add x25,x25,x16 // h+=Sigma1(e) ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x5,x26,ror#39 // Sigma0(a) ++ eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) ++ add x14,x14,x7 ++ add x21,x21,x25 // d+=h ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x14,x14,x4 ++ add x25,x25,x17 // h+=Sigma0(a) ++ add x14,x14,x3 ++ ldr x3,[sp,#0] ++ str x6,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ ror x5,x0,#1 ++ and x17,x22,x21 ++ ror x4,x13,#19 ++ bic x28,x23,x21 ++ ror x6,x25,#28 ++ add x24,x24,x14 // h+=X[i] ++ eor x16,x16,x21,ror#18 ++ eor x5,x5,x0,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x21,ror#41 // Sigma1(e) ++ eor x6,x6,x25,ror#34 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x4,x4,x13,ror#61 ++ eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) ++ add x24,x24,x16 // h+=Sigma1(e) ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x6,x25,ror#39 // Sigma0(a) ++ eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) ++ add x15,x15,x8 ++ add x20,x20,x24 // d+=h ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x15,x15,x5 ++ add x24,x24,x17 // h+=Sigma0(a) ++ add x15,x15,x4 ++ ldr x4,[sp,#8] ++ str x7,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ ror x6,x1,#1 ++ and x17,x21,x20 ++ ror x5,x14,#19 ++ bic x19,x22,x20 ++ ror x7,x24,#28 ++ add x23,x23,x15 // h+=X[i] ++ eor x16,x16,x20,ror#18 ++ eor x6,x6,x1,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x20,ror#41 // Sigma1(e) ++ eor x7,x7,x24,ror#34 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x5,x5,x14,ror#61 ++ eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) ++ add x23,x23,x16 // h+=Sigma1(e) ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x7,x24,ror#39 // Sigma0(a) ++ eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) ++ add x0,x0,x9 ++ add x27,x27,x23 // d+=h ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x0,x0,x6 ++ add x23,x23,x17 // h+=Sigma0(a) ++ add x0,x0,x5 ++ ldr x5,[sp,#16] ++ str x8,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ ror x7,x2,#1 ++ and x17,x20,x27 ++ ror x6,x15,#19 ++ bic x28,x21,x27 ++ ror x8,x23,#28 ++ add x22,x22,x0 // h+=X[i] ++ eor x16,x16,x27,ror#18 ++ eor x7,x7,x2,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x27,ror#41 // Sigma1(e) ++ eor x8,x8,x23,ror#34 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x6,x6,x15,ror#61 ++ eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) ++ add x22,x22,x16 // h+=Sigma1(e) ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x8,x23,ror#39 // Sigma0(a) ++ eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) ++ add x1,x1,x10 ++ add x26,x26,x22 // d+=h ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x1,x1,x7 ++ add x22,x22,x17 // h+=Sigma0(a) ++ add x1,x1,x6 ++ ldr x6,[sp,#24] ++ str x9,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ ror x8,x3,#1 ++ and x17,x27,x26 ++ ror x7,x0,#19 ++ bic x19,x20,x26 ++ ror x9,x22,#28 ++ add x21,x21,x1 // h+=X[i] ++ eor x16,x16,x26,ror#18 ++ eor x8,x8,x3,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x26,ror#41 // Sigma1(e) ++ eor x9,x9,x22,ror#34 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x7,x7,x0,ror#61 ++ eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) ++ add x21,x21,x16 // h+=Sigma1(e) ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x9,x22,ror#39 // Sigma0(a) ++ eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) ++ add x2,x2,x11 ++ add x25,x25,x21 // d+=h ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x2,x2,x8 ++ add x21,x21,x17 // h+=Sigma0(a) ++ add x2,x2,x7 ++ ldr x7,[sp,#0] ++ str x10,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x9,x4,#1 ++ and x17,x26,x25 ++ ror x8,x1,#19 ++ bic x28,x27,x25 ++ ror x10,x21,#28 ++ add x20,x20,x2 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x9,x9,x4,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x10,x10,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x8,x8,x1,ror#61 ++ eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x10,x21,ror#39 // Sigma0(a) ++ eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) ++ add x3,x3,x12 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x3,x3,x9 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x3,x3,x8 ++ cbnz x19,.Loop_16_xx ++ ++ ldp x0,x2,[x29,#96] ++ ldr x1,[x29,#112] ++ sub x30,x30,#648 // rewind ++ ++ ldp x3,x4,[x0] ++ ldp x5,x6,[x0,#2*8] ++ add x1,x1,#14*8 // advance input pointer ++ ldp x7,x8,[x0,#4*8] ++ add x20,x20,x3 ++ ldp x9,x10,[x0,#6*8] ++ add x21,x21,x4 ++ add x22,x22,x5 ++ add x23,x23,x6 ++ stp x20,x21,[x0] ++ add x24,x24,x7 ++ add x25,x25,x8 ++ stp x22,x23,[x0,#2*8] ++ add x26,x26,x9 ++ add x27,x27,x10 ++ cmp x1,x2 ++ stp x24,x25,[x0,#4*8] ++ stp x26,x27,[x0,#6*8] ++ b.ne .Loop ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#4*8 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#128 ++ ret ++.size sha512_block_data_order,.-sha512_block_data_order ++ ++.align 6 ++.type .LK512,%object ++.LK512: ++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd ++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++ .quad 0x3956c25bf348b538,0x59f111f1b605d019 ++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++ .quad 0xd807aa98a3030242,0x12835b0145706fbe ++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++ .quad 0x06ca6351e003826f,0x142929670a0e6e70 ++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++ .quad 0x81c2c92e47edaee6,0x92722c851482353b ++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++ .quad 0xd192e819d6ef5218,0xd69906245565a910 ++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++ .quad 0x90befffa23631e28,0xa4506cebde82bde9 ++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++ .quad 0xca273eceea26619c,0xd186b8c721c0c207 ++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++ .quad 0x113f9804bef90dae,0x1b710b35131c471b ++ .quad 0x28db77f523047d84,0x32caab7b40c72493 ++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 ++ .quad 0 // terminator ++.size .LK512,.-.LK512 ++#ifndef __KERNEL__ ++.align 3 ++.LOPENSSL_armcap_P: ++# ifdef __ILP32__ ++ .long OPENSSL_armcap_P-. ++# else ++ .quad OPENSSL_armcap_P-. ++# endif ++#endif ++.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " ++.align 2 ++#ifndef __KERNEL__ ++.comm OPENSSL_armcap_P,4,4 ++#endif +--- a/arch/arm64/kernel/bpi.S ++++ b/arch/arm64/kernel/bpi.S +@@ -17,6 +17,7 @@ + */ + + #include ++#include + + .macro ventry target + .rept 31 +@@ -77,3 +78,22 @@ ENTRY(__psci_hyp_bp_inval_start) + ldp x0, x1, [sp, #(16 * 8)] + add sp, sp, #(8 * 18) + ENTRY(__psci_hyp_bp_inval_end) ++ ++.macro smccc_workaround_1 inst ++ sub sp, sp, #(8 * 4) ++ stp x2, x3, [sp, #(8 * 0)] ++ stp x0, x1, [sp, #(8 * 2)] ++ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 ++ \inst #0 ++ ldp x2, x3, [sp, #(8 * 0)] ++ ldp x0, x1, [sp, #(8 * 2)] ++ add sp, sp, #(8 * 4) ++.endm ++ ++ENTRY(__smccc_workaround_1_smc_start) ++ smccc_workaround_1 smc ++ENTRY(__smccc_workaround_1_smc_end) ++ ++ENTRY(__smccc_workaround_1_hvc_start) ++ smccc_workaround_1 hvc ++ENTRY(__smccc_workaround_1_hvc_end) +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -54,6 +54,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_har + + #ifdef CONFIG_KVM + extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; ++extern char __smccc_workaround_1_smc_start[]; ++extern char __smccc_workaround_1_smc_end[]; ++extern char __smccc_workaround_1_hvc_start[]; ++extern char __smccc_workaround_1_hvc_end[]; + + static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +@@ -96,8 +100,12 @@ static void __install_bp_hardening_cb(bp + spin_unlock(&bp_lock); + } + #else +-#define __psci_hyp_bp_inval_start NULL +-#define __psci_hyp_bp_inval_end NULL ++#define __psci_hyp_bp_inval_start NULL ++#define __psci_hyp_bp_inval_end NULL ++#define __smccc_workaround_1_smc_start NULL ++#define __smccc_workaround_1_smc_end NULL ++#define __smccc_workaround_1_hvc_start NULL ++#define __smccc_workaround_1_hvc_end NULL + + static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, +@@ -124,17 +132,75 @@ static void install_bp_hardening_cb(con + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); + } + ++#include ++#include + #include + ++static void call_smc_arch_workaround_1(void) ++{ ++ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); ++} ++ ++static void call_hvc_arch_workaround_1(void) ++{ ++ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); ++} ++ ++static bool check_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) ++{ ++ bp_hardening_cb_t cb; ++ void *smccc_start, *smccc_end; ++ struct arm_smccc_res res; ++ ++ if (!entry->matches(entry, SCOPE_LOCAL_CPU)) ++ return false; ++ ++ if (psci_ops.smccc_version == SMCCC_VERSION_1_0) ++ return false; ++ ++ switch (psci_ops.conduit) { ++ case PSCI_CONDUIT_HVC: ++ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ++ ARM_SMCCC_ARCH_WORKAROUND_1, &res); ++ if (res.a0) ++ return false; ++ cb = call_hvc_arch_workaround_1; ++ smccc_start = __smccc_workaround_1_hvc_start; ++ smccc_end = __smccc_workaround_1_hvc_end; ++ break; ++ ++ case PSCI_CONDUIT_SMC: ++ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ++ ARM_SMCCC_ARCH_WORKAROUND_1, &res); ++ if (res.a0) ++ return false; ++ cb = call_smc_arch_workaround_1; ++ smccc_start = __smccc_workaround_1_smc_start; ++ smccc_end = __smccc_workaround_1_smc_end; ++ break; ++ ++ default: ++ return false; ++ } ++ ++ install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); ++ ++ return true; ++} ++ + static int enable_psci_bp_hardening(void *data) + { + const struct arm64_cpu_capabilities *entry = data; + +- if (psci_ops.get_version) ++ if (psci_ops.get_version) { ++ if (check_smccc_arch_workaround_1(entry)) ++ return 0; ++ + install_bp_hardening_cb(entry, + (bp_hardening_cb_t)psci_ops.get_version, + __psci_hyp_bp_inval_start, + __psci_hyp_bp_inval_end); ++ } + + return 0; + } diff --git a/queue-4.9/arm64-add-skeleton-to-harden-the-branch-predictor-against-aliasing-attacks.patch b/queue-4.9/arm64-add-skeleton-to-harden-the-branch-predictor-against-aliasing-attacks.patch new file mode 100644 index 00000000000..f090d34734b --- /dev/null +++ b/queue-4.9/arm64-add-skeleton-to-harden-the-branch-predictor-against-aliasing-attacks.patch @@ -0,0 +1,373 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:13 +0100 +Subject: [PATCH v4.9.y 17/42] arm64: Add skeleton to harden the branch predictor against aliasing attacks +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-18-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 0f15adbb2861ce6f75ccfc5a92b19eae0ef327d0 upstream. + +Aliasing attacks against CPU branch predictors can allow an attacker to +redirect speculative control flow on some CPUs and potentially divulge +information from one context to another. + +This patch adds initial skeleton code behind a new Kconfig option to +enable implementation-specific mitigations against these attacks for +CPUs that are affected. + +Co-developed-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +[v4.9: copy bp hardening cb via text mapping] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/Kconfig | 17 ++++++++ + arch/arm64/include/asm/cpucaps.h | 3 + + arch/arm64/include/asm/mmu.h | 39 ++++++++++++++++++++ + arch/arm64/include/asm/sysreg.h | 2 + + arch/arm64/kernel/Makefile | 4 ++ + arch/arm64/kernel/bpi.S | 55 ++++++++++++++++++++++++++++ + arch/arm64/kernel/cpu_errata.c | 74 +++++++++++++++++++++++++++++++++++++++ + arch/arm64/kernel/cpufeature.c | 3 + + arch/arm64/kernel/entry.S | 8 ++-- + arch/arm64/mm/context.c | 2 + + arch/arm64/mm/fault.c | 17 ++++++++ + 11 files changed, 219 insertions(+), 5 deletions(-) + create mode 100644 arch/arm64/kernel/bpi.S + +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -745,6 +745,23 @@ config UNMAP_KERNEL_AT_EL0 + + If unsure, say Y. + ++config HARDEN_BRANCH_PREDICTOR ++ bool "Harden the branch predictor against aliasing attacks" if EXPERT ++ default y ++ help ++ Speculation attacks against some high-performance processors rely on ++ being able to manipulate the branch predictor for a victim context by ++ executing aliasing branches in the attacker context. Such attacks ++ can be partially mitigated against by clearing internal branch ++ predictor state and limiting the prediction logic in some situations. ++ ++ This config option will take CPU-specific actions to harden the ++ branch predictor against aliasing attacks and may rely on specific ++ instruction sequences or control bits being set by the system ++ firmware. ++ ++ If unsure, say Y. ++ + menuconfig ARMV8_DEPRECATED + bool "Emulate deprecated/obsolete ARMv8 instructions" + depends on COMPAT +--- a/arch/arm64/include/asm/cpucaps.h ++++ b/arch/arm64/include/asm/cpucaps.h +@@ -35,7 +35,8 @@ + #define ARM64_HYP_OFFSET_LOW 14 + #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 + #define ARM64_UNMAP_KERNEL_AT_EL0 16 ++#define ARM64_HARDEN_BRANCH_PREDICTOR 17 + +-#define ARM64_NCAPS 17 ++#define ARM64_NCAPS 18 + + #endif /* __ASM_CPUCAPS_H */ +--- a/arch/arm64/include/asm/mmu.h ++++ b/arch/arm64/include/asm/mmu.h +@@ -20,6 +20,8 @@ + + #ifndef __ASSEMBLY__ + ++#include ++ + typedef struct { + atomic64_t id; + void *vdso; +@@ -38,6 +40,43 @@ static inline bool arm64_kernel_unmapped + cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0); + } + ++typedef void (*bp_hardening_cb_t)(void); ++ ++struct bp_hardening_data { ++ int hyp_vectors_slot; ++ bp_hardening_cb_t fn; ++}; ++ ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; ++ ++DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); ++ ++static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) ++{ ++ return this_cpu_ptr(&bp_hardening_data); ++} ++ ++static inline void arm64_apply_bp_hardening(void) ++{ ++ struct bp_hardening_data *d; ++ ++ if (!cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) ++ return; ++ ++ d = arm64_get_bp_hardening_data(); ++ if (d->fn) ++ d->fn(); ++} ++#else ++static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) ++{ ++ return NULL; ++} ++ ++static inline void arm64_apply_bp_hardening(void) { } ++#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ ++ + extern void paging_init(void); + extern void bootmem_init(void); + extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); +--- a/arch/arm64/include/asm/sysreg.h ++++ b/arch/arm64/include/asm/sysreg.h +@@ -118,6 +118,8 @@ + + /* id_aa64pfr0 */ + #define ID_AA64PFR0_CSV3_SHIFT 60 ++#define ID_AA64PFR0_CSV2_SHIFT 56 ++#define ID_AA64PFR0_SVE_SHIFT 32 + #define ID_AA64PFR0_GIC_SHIFT 24 + #define ID_AA64PFR0_ASIMD_SHIFT 20 + #define ID_AA64PFR0_FP_SHIFT 16 +--- a/arch/arm64/kernel/Makefile ++++ b/arch/arm64/kernel/Makefile +@@ -51,6 +51,10 @@ arm64-obj-$(CONFIG_HIBERNATION) += hibe + arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ + cpu-reset.o + ++ifeq ($(CONFIG_KVM),y) ++arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o ++endif ++ + obj-y += $(arm64-obj-y) vdso/ probes/ + obj-m += $(arm64-obj-m) + head-y := head.o +--- /dev/null ++++ b/arch/arm64/kernel/bpi.S +@@ -0,0 +1,55 @@ ++/* ++ * Contains CPU specific branch predictor invalidation sequences ++ * ++ * Copyright (C) 2018 ARM Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#include ++ ++.macro ventry target ++ .rept 31 ++ nop ++ .endr ++ b \target ++.endm ++ ++.macro vectors target ++ ventry \target + 0x000 ++ ventry \target + 0x080 ++ ventry \target + 0x100 ++ ventry \target + 0x180 ++ ++ ventry \target + 0x200 ++ ventry \target + 0x280 ++ ventry \target + 0x300 ++ ventry \target + 0x380 ++ ++ ventry \target + 0x400 ++ ventry \target + 0x480 ++ ventry \target + 0x500 ++ ventry \target + 0x580 ++ ++ ventry \target + 0x600 ++ ventry \target + 0x680 ++ ventry \target + 0x700 ++ ventry \target + 0x780 ++.endm ++ ++ .align 11 ++ENTRY(__bp_harden_hyp_vecs_start) ++ .rept 4 ++ vectors __kvm_hyp_vector ++ .endr ++ENTRY(__bp_harden_hyp_vecs_end) +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -46,6 +46,80 @@ static int cpu_enable_trap_ctr_access(vo + return 0; + } + ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++#include ++#include ++ ++DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); ++ ++#ifdef CONFIG_KVM ++static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, ++ const char *hyp_vecs_end) ++{ ++ void *dst = __bp_harden_hyp_vecs_start + slot * SZ_2K; ++ int i; ++ ++ for (i = 0; i < SZ_2K; i += 0x80) ++ memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); ++ ++ flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); ++} ++ ++static void __install_bp_hardening_cb(bp_hardening_cb_t fn, ++ const char *hyp_vecs_start, ++ const char *hyp_vecs_end) ++{ ++ static int last_slot = -1; ++ static DEFINE_SPINLOCK(bp_lock); ++ int cpu, slot = -1; ++ ++ spin_lock(&bp_lock); ++ for_each_possible_cpu(cpu) { ++ if (per_cpu(bp_hardening_data.fn, cpu) == fn) { ++ slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); ++ break; ++ } ++ } ++ ++ if (slot == -1) { ++ last_slot++; ++ BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) ++ / SZ_2K) <= last_slot); ++ slot = last_slot; ++ __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); ++ } ++ ++ __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); ++ __this_cpu_write(bp_hardening_data.fn, fn); ++ spin_unlock(&bp_lock); ++} ++#else ++static void __install_bp_hardening_cb(bp_hardening_cb_t fn, ++ const char *hyp_vecs_start, ++ const char *hyp_vecs_end) ++{ ++ __this_cpu_write(bp_hardening_data.fn, fn); ++} ++#endif /* CONFIG_KVM */ ++ ++static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, ++ bp_hardening_cb_t fn, ++ const char *hyp_vecs_start, ++ const char *hyp_vecs_end) ++{ ++ u64 pfr0; ++ ++ if (!entry->matches(entry, SCOPE_LOCAL_CPU)) ++ return; ++ ++ pfr0 = read_cpuid(ID_AA64PFR0_EL1); ++ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) ++ return; ++ ++ __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); ++} ++#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ ++ + #define MIDR_RANGE(model, min, max) \ + .def_scope = SCOPE_LOCAL_CPU, \ + .matches = is_affected_midr_range, \ +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -94,7 +94,8 @@ static const struct arm64_ftr_bits ftr_i + + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), +- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 28, 0), ++ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ++ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 24, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -589,13 +589,15 @@ el0_ia: + * Instruction abort handling + */ + mrs x26, far_el1 +- // enable interrupts before calling the main handler +- enable_dbg_and_irq ++ msr daifclr, #(8 | 4 | 1) ++#ifdef CONFIG_TRACE_IRQFLAGS ++ bl trace_hardirqs_off ++#endif + ct_user_exit + mov x0, x26 + mov x1, x25 + mov x2, sp +- bl do_mem_abort ++ bl do_el0_ia_bp_hardening + b ret_to_user + el0_fpsimd_acc: + /* +--- a/arch/arm64/mm/context.c ++++ b/arch/arm64/mm/context.c +@@ -240,6 +240,8 @@ asmlinkage void post_ttbr_update_workaro + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); ++ ++ arm64_apply_bp_hardening(); + } + + static int asids_init(void) +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -590,6 +590,23 @@ asmlinkage void __exception do_mem_abort + arm64_notify_die("", regs, &info, esr); + } + ++asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, ++ unsigned int esr, ++ struct pt_regs *regs) ++{ ++ /* ++ * We've taken an instruction abort from userspace and not yet ++ * re-enabled IRQs. If the address is a kernel address, apply ++ * BP hardening prior to enabling IRQs and pre-emption. ++ */ ++ if (addr > TASK_SIZE) ++ arm64_apply_bp_hardening(); ++ ++ local_irq_enable(); ++ do_mem_abort(addr, esr, regs); ++} ++ ++ + /* + * Handle stack alignment exceptions. + */ diff --git a/queue-4.9/arm64-barrier-add-csdb-macros-to-control-data-value-prediction.patch b/queue-4.9/arm64-barrier-add-csdb-macros-to-control-data-value-prediction.patch new file mode 100644 index 00000000000..3ffc0a23c03 --- /dev/null +++ b/queue-4.9/arm64-barrier-add-csdb-macros-to-control-data-value-prediction.patch @@ -0,0 +1,57 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:10:57 +0100 +Subject: [PATCH v4.9.y 01/42] arm64: barrier: Add CSDB macros to control data-value prediction +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-2-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 669474e772b952b14f4de4845a1558fd4c0414a4 upstream. + +For CPUs capable of data value prediction, CSDB waits for any outstanding +predictions to architecturally resolve before allowing speculative execution +to continue. Provide macros to expose it to the arch code. + +Reviewed-by: Mark Rutland +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/assembler.h | 7 +++++++ + arch/arm64/include/asm/barrier.h | 2 ++ + 2 files changed, 9 insertions(+) + +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -87,6 +87,13 @@ + .endm + + /* ++ * Value prediction barrier ++ */ ++ .macro csdb ++ hint #20 ++ .endm ++ ++/* + * NOP sequence + */ + .macro nops, num +--- a/arch/arm64/include/asm/barrier.h ++++ b/arch/arm64/include/asm/barrier.h +@@ -31,6 +31,8 @@ + #define dmb(opt) asm volatile("dmb " #opt : : : "memory") + #define dsb(opt) asm volatile("dsb " #opt : : : "memory") + ++#define csdb() asm volatile("hint #20" : : : "memory") ++ + #define mb() dsb(sy) + #define rmb() dsb(ld) + #define wmb() dsb(st) diff --git a/queue-4.9/arm64-branch-predictor-hardening-for-cavium-thunderx2.patch b/queue-4.9/arm64-branch-predictor-hardening-for-cavium-thunderx2.patch new file mode 100644 index 00000000000..2d0d590ac36 --- /dev/null +++ b/queue-4.9/arm64-branch-predictor-hardening-for-cavium-thunderx2.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:22 +0100 +Subject: [PATCH v4.9.y 26/42] arm64: Branch predictor hardening for Cavium ThunderX2 +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-27-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Jayachandran C + +commit f3d795d9b360523beca6d13ba64c2c532f601149 upstream. + +Use PSCI based mitigation for speculative execution attacks targeting +the branch predictor. We use the same mechanism as the one used for +Cortex-A CPUs, we expect the PSCI version call to have a side effect +of clearing the BTBs. + +Acked-by: Will Deacon +Signed-off-by: Jayachandran C +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpu_errata.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -252,6 +252,16 @@ const struct arm64_cpu_capabilities arm6 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + .enable = enable_psci_bp_hardening, + }, ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), ++ .enable = enable_psci_bp_hardening, ++ }, + #endif + { + } diff --git a/queue-4.9/arm64-cpu_errata-allow-an-erratum-to-be-match-for-all-revisions-of-a-core.patch b/queue-4.9/arm64-cpu_errata-allow-an-erratum-to-be-match-for-all-revisions-of-a-core.patch new file mode 100644 index 00000000000..cb77822a0b1 --- /dev/null +++ b/queue-4.9/arm64-cpu_errata-allow-an-erratum-to-be-match-for-all-revisions-of-a-core.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:20 +0100 +Subject: [PATCH v4.9.y 24/42] arm64: cpu_errata: Allow an erratum to be match for all revisions of a core +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-25-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 06f1494f837da8997d670a1ba87add7963b08922 upstream. + +Some minor erratum may not be fixed in further revisions of a core, +leading to a situation where the workaround needs to be updated each +time an updated core is released. + +Introduce a MIDR_ALL_VERSIONS match helper that will work for all +versions of that MIDR, once and for all. + +Acked-by: Thomas Gleixner +Acked-by: Mark Rutland +Acked-by: Daniel Lezcano +Reviewed-by: Suzuki K Poulose +Signed-off-by: Marc Zyngier +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpu_errata.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -127,6 +127,13 @@ static void install_bp_hardening_cb(con + .midr_range_min = min, \ + .midr_range_max = max + ++#define MIDR_ALL_VERSIONS(model) \ ++ .def_scope = SCOPE_LOCAL_CPU, \ ++ .matches = is_affected_midr_range, \ ++ .midr_model = model, \ ++ .midr_range_min = 0, \ ++ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) ++ + const struct arm64_cpu_capabilities arm64_errata[] = { + #if defined(CONFIG_ARM64_ERRATUM_826319) || \ + defined(CONFIG_ARM64_ERRATUM_827319) || \ diff --git a/queue-4.9/arm64-cpufeature-__this_cpu_has_cap-shouldn-t-stop-early.patch b/queue-4.9/arm64-cpufeature-__this_cpu_has_cap-shouldn-t-stop-early.patch new file mode 100644 index 00000000000..62414fa6525 --- /dev/null +++ b/queue-4.9/arm64-cpufeature-__this_cpu_has_cap-shouldn-t-stop-early.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:07 +0100 +Subject: [PATCH v4.9.y 11/42] arm64: cpufeature: __this_cpu_has_cap() shouldn't stop early +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-12-mark.rutland@arm.com> + +From: Mark Rutland + + +From: James Morse + +commit edf298cfce47ab7279d03b5203ae2ef3a58e49db upstream. + +this_cpu_has_cap() tests caps->desc not caps->matches, so it stops +walking the list when it finds a 'silent' feature, instead of +walking to the end of the list. + +Prior to v4.6's 644c2ae198412 ("arm64: cpufeature: Test 'matches' pointer +to find the end of the list") we always tested desc to find the end of +a capability list. This was changed for dubious things like PAN_NOT_UAO. +v4.7's e3661b128e53e ("arm64: Allow a capability to be checked on +single CPU") added this_cpu_has_cap() using the old desc style test. + +CC: Suzuki K Poulose +Reviewed-by: Suzuki K Poulose +Acked-by: Marc Zyngier +Signed-off-by: James Morse +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpufeature.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -1024,9 +1024,8 @@ static bool __this_cpu_has_cap(const str + if (WARN_ON(preemptible())) + return false; + +- for (caps = cap_array; caps->desc; caps++) ++ for (caps = cap_array; caps->matches; caps++) + if (caps->capability == cap && +- caps->matches && + caps->matches(caps, SCOPE_LOCAL_CPU)) + return true; + return false; diff --git a/queue-4.9/arm64-cpufeature-pass-capability-structure-to-enable-callback.patch b/queue-4.9/arm64-cpufeature-pass-capability-structure-to-enable-callback.patch new file mode 100644 index 00000000000..9f370e96147 --- /dev/null +++ b/queue-4.9/arm64-cpufeature-pass-capability-structure-to-enable-callback.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:09 +0100 +Subject: [PATCH v4.9.y 13/42] arm64: cpufeature: Pass capability structure to ->enable callback +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-14-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 0a0d111d40fd1dc588cc590fab6b55d86ddc71d3 upstream. + +In order to invoke the CPU capability ->matches callback from the ->enable +callback for applying local-CPU workarounds, we need a handle on the +capability structure. + +This patch passes a pointer to the capability structure to the ->enable +callback. + +Reviewed-by: Suzuki K Poulose +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpufeature.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kernel/cpufeature.c ++++ b/arch/arm64/kernel/cpufeature.c +@@ -1058,7 +1058,7 @@ void __init enable_cpu_capabilities(cons + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ +- stop_machine(caps->enable, NULL, cpu_online_mask); ++ stop_machine(caps->enable, (void *)caps, cpu_online_mask); + } + + /* +@@ -1115,7 +1115,7 @@ verify_local_cpu_features(const struct a + cpu_die_early(); + } + if (caps->enable) +- caps->enable(NULL); ++ caps->enable((void *)caps); + } + } + diff --git a/queue-4.9/arm64-cputype-add-missing-midr-values-for-cortex-a72-and-cortex-a75.patch b/queue-4.9/arm64-cputype-add-missing-midr-values-for-cortex-a72-and-cortex-a75.patch new file mode 100644 index 00000000000..5550d96ce35 --- /dev/null +++ b/queue-4.9/arm64-cputype-add-missing-midr-values-for-cortex-a72-and-cortex-a75.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:19 +0100 +Subject: [PATCH v4.9.y 23/42] arm64: cputype: Add missing MIDR values for Cortex-A72 and Cortex-A75 +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-24-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit a65d219fe5dc7887fd5ca04c2ac3e9a34feb8dfc upstream. + +Hook up MIDR values for the Cortex-A72 and Cortex-A75 CPUs, since they +will soon need MIDR matches for hardening the branch predictor. + +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/cputype.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/arm64/include/asm/cputype.h ++++ b/arch/arm64/include/asm/cputype.h +@@ -75,7 +75,10 @@ + #define ARM_CPU_PART_AEM_V8 0xD0F + #define ARM_CPU_PART_FOUNDATION 0xD00 + #define ARM_CPU_PART_CORTEX_A57 0xD07 ++#define ARM_CPU_PART_CORTEX_A72 0xD08 + #define ARM_CPU_PART_CORTEX_A53 0xD03 ++#define ARM_CPU_PART_CORTEX_A73 0xD09 ++#define ARM_CPU_PART_CORTEX_A75 0xD0A + + #define APM_CPU_PART_POTENZA 0x000 + +@@ -87,6 +90,9 @@ + + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) ++#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) ++#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) ++#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) + #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) + #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) + #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) diff --git a/queue-4.9/arm64-entry-apply-bp-hardening-for-high-priority-synchronous-exceptions.patch b/queue-4.9/arm64-entry-apply-bp-hardening-for-high-priority-synchronous-exceptions.patch new file mode 100644 index 00000000000..0d0d91d5c6b --- /dev/null +++ b/queue-4.9/arm64-entry-apply-bp-hardening-for-high-priority-synchronous-exceptions.patch @@ -0,0 +1,71 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:17 +0100 +Subject: [PATCH v4.9.y 21/42] arm64: entry: Apply BP hardening for high-priority synchronous exceptions +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-22-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 5dfc6ed27710c42cbc15db5c0d4475699991da0a upstream. + +Software-step and PC alignment fault exceptions have higher priority than +instruction abort exceptions, so apply the BP hardening hooks there too +if the user PC appears to reside in kernel space. + +Reported-by: Dan Hettena +Reviewed-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/entry.S | 6 ++++-- + arch/arm64/mm/fault.c | 9 +++++++++ + 2 files changed, 13 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -624,8 +624,10 @@ el0_sp_pc: + * Stack or PC alignment exception handling + */ + mrs x26, far_el1 +- // enable interrupts before calling the main handler +- enable_dbg_and_irq ++ enable_dbg ++#ifdef CONFIG_TRACE_IRQFLAGS ++ bl trace_hardirqs_off ++#endif + ct_user_exit + mov x0, x26 + mov x1, x25 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -617,6 +617,12 @@ asmlinkage void __exception do_sp_pc_abo + struct siginfo info; + struct task_struct *tsk = current; + ++ if (user_mode(regs)) { ++ if (instruction_pointer(regs) > TASK_SIZE) ++ arm64_apply_bp_hardening(); ++ local_irq_enable(); ++ } ++ + if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS)) + pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n", + tsk->comm, task_pid_nr(tsk), +@@ -676,6 +682,9 @@ asmlinkage int __exception do_debug_exce + if (interrupts_enabled(regs)) + trace_hardirqs_off(); + ++ if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE) ++ arm64_apply_bp_hardening(); ++ + if (!inf->fn(addr, esr, regs)) { + rv = 1; + } else { diff --git a/queue-4.9/arm64-entry-apply-bp-hardening-for-suspicious-interrupts-from-el0.patch b/queue-4.9/arm64-entry-apply-bp-hardening-for-suspicious-interrupts-from-el0.patch new file mode 100644 index 00000000000..4e6b65fa1fd --- /dev/null +++ b/queue-4.9/arm64-entry-apply-bp-hardening-for-suspicious-interrupts-from-el0.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:18 +0100 +Subject: [PATCH v4.9.y 22/42] arm64: entry: Apply BP hardening for suspicious interrupts from EL0 +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-23-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 30d88c0e3ace625a92eead9ca0ad94093a8f59fe upstream. + +It is possible to take an IRQ from EL0 following a branch to a kernel +address in such a way that the IRQ is prioritised over the instruction +abort. Whilst an attacker would need to get the stars to align here, +it might be sufficient with enough calibration so perform BP hardening +in the rare case that we see a kernel address in the ELR when handling +an IRQ from EL0. + +Reported-by: Dan Hettena +Reviewed-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/entry.S | 5 +++++ + arch/arm64/mm/fault.c | 6 ++++++ + 2 files changed, 11 insertions(+) + +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -686,6 +686,11 @@ el0_irq_naked: + #endif + + ct_user_exit ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++ tbz x22, #55, 1f ++ bl do_el0_irq_bp_hardening ++1: ++#endif + irq_handler + + #ifdef CONFIG_TRACE_IRQFLAGS +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -590,6 +590,12 @@ asmlinkage void __exception do_mem_abort + arm64_notify_die("", regs, &info, esr); + } + ++asmlinkage void __exception do_el0_irq_bp_hardening(void) ++{ ++ /* PC has already been checked in entry.S */ ++ arm64_apply_bp_hardening(); ++} ++ + asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) diff --git a/queue-4.9/arm64-entry-ensure-branch-through-syscall-table-is-bounded-under-speculation.patch b/queue-4.9/arm64-entry-ensure-branch-through-syscall-table-is-bounded-under-speculation.patch new file mode 100644 index 00000000000..6a46f75f028 --- /dev/null +++ b/queue-4.9/arm64-entry-ensure-branch-through-syscall-table-is-bounded-under-speculation.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:02 +0100 +Subject: [PATCH v4.9.y 06/42] arm64: entry: Ensure branch through syscall table is bounded under speculation +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-7-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 6314d90e64936c584f300a52ef173603fb2461b5 upstream. + +In a similar manner to array_index_mask_nospec, this patch introduces an +assembly macro (mask_nospec64) which can be used to bound a value under +speculation. This macro is then used to ensure that the indirect branch +through the syscall table is bounded under speculation, with out-of-range +addresses speculating as calls to sys_io_setup (0). + +Reviewed-by: Mark Rutland +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +[v4.9: use existing scno & sc_nr definitions] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/assembler.h | 11 +++++++++++ + arch/arm64/kernel/entry.S | 1 + + 2 files changed, 12 insertions(+) + +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -94,6 +94,17 @@ + .endm + + /* ++ * Sanitise a 64-bit bounded index wrt speculation, returning zero if out ++ * of bounds. ++ */ ++ .macro mask_nospec64, idx, limit, tmp ++ sub \tmp, \idx, \limit ++ bic \tmp, \tmp, \idx ++ and \idx, \idx, \tmp, asr #63 ++ csdb ++ .endm ++ ++/* + * NOP sequence + */ + .macro nops, num +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -795,6 +795,7 @@ el0_svc_naked: // compat entry point + b.ne __sys_trace + cmp scno, sc_nr // check upper syscall limit + b.hs ni_sys ++ mask_nospec64 scno, sc_nr, x19 // enforce bounds for syscall number + ldr x16, [stbl, scno, lsl #3] // address in the syscall table + blr x16 // call sys_* routine + b ret_fast_syscall diff --git a/queue-4.9/arm64-factor-out-ttbr0_el1-post-update-workaround-into-a-specific-asm-macro.patch b/queue-4.9/arm64-factor-out-ttbr0_el1-post-update-workaround-into-a-specific-asm-macro.patch new file mode 100644 index 00000000000..2b35473fe02 --- /dev/null +++ b/queue-4.9/arm64-factor-out-ttbr0_el1-post-update-workaround-into-a-specific-asm-macro.patch @@ -0,0 +1,68 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:11 +0100 +Subject: [PATCH v4.9.y 15/42] arm64: Factor out TTBR0_EL1 post-update workaround into a specific asm macro +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-16-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Catalin Marinas + +commit f33bcf03e6079668da6bf4eec4a7dcf9289131d0 upstream. + +This patch takes the errata workaround code out of cpu_do_switch_mm into +a dedicated post_ttbr0_update_workaround macro which will be reused in a +subsequent patch. + +Cc: Will Deacon +Cc: James Morse +Cc: Kees Cook +Reviewed-by: Mark Rutland +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/assembler.h | 14 ++++++++++++++ + arch/arm64/mm/proc.S | 6 +----- + 2 files changed, 15 insertions(+), 5 deletions(-) + +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -434,4 +434,18 @@ alternative_endif + .macro pte_to_phys, phys, pte + and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) + .endm ++ ++/* ++ * Errata workaround post TTBR0_EL1 update. ++ */ ++ .macro post_ttbr0_update_workaround ++#ifdef CONFIG_CAVIUM_ERRATUM_27456 ++alternative_if ARM64_WORKAROUND_CAVIUM_27456 ++ ic iallu ++ dsb nsh ++ isb ++alternative_else_nop_endif ++#endif ++ .endm ++ + #endif /* __ASM_ASSEMBLER_H */ +--- a/arch/arm64/mm/proc.S ++++ b/arch/arm64/mm/proc.S +@@ -139,11 +139,7 @@ ENTRY(cpu_do_switch_mm) + isb + msr ttbr0_el1, x0 // now update TTBR0 + isb +-alternative_if ARM64_WORKAROUND_CAVIUM_27456 +- ic iallu +- dsb nsh +- isb +-alternative_else_nop_endif ++ post_ttbr0_update_workaround + ret + ENDPROC(cpu_do_switch_mm) + diff --git a/queue-4.9/arm64-implement-array_index_mask_nospec.patch b/queue-4.9/arm64-implement-array_index_mask_nospec.patch new file mode 100644 index 00000000000..c594224c973 --- /dev/null +++ b/queue-4.9/arm64-implement-array_index_mask_nospec.patch @@ -0,0 +1,64 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:10:58 +0100 +Subject: [PATCH v4.9.y 02/42] arm64: Implement array_index_mask_nospec() +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-3-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Robin Murphy + +commit 022620eed3d0bc4bf2027326f599f5ad71c2ea3f upstream. + +Provide an optimised, assembly implementation of array_index_mask_nospec() +for arm64 so that the compiler is not in a position to transform the code +in ways which affect its ability to inhibit speculation (e.g. by introducing +conditional branches). + +This is similar to the sequence used by x86, modulo architectural differences +in the carry/borrow flags. + +Reviewed-by: Mark Rutland +Signed-off-by: Robin Murphy +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/barrier.h | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/arch/arm64/include/asm/barrier.h ++++ b/arch/arm64/include/asm/barrier.h +@@ -40,6 +40,27 @@ + #define dma_rmb() dmb(oshld) + #define dma_wmb() dmb(oshst) + ++/* ++ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz ++ * and 0 otherwise. ++ */ ++#define array_index_mask_nospec array_index_mask_nospec ++static inline unsigned long array_index_mask_nospec(unsigned long idx, ++ unsigned long sz) ++{ ++ unsigned long mask; ++ ++ asm volatile( ++ " cmp %1, %2\n" ++ " sbc %0, xzr, xzr\n" ++ : "=r" (mask) ++ : "r" (idx), "Ir" (sz) ++ : "cc"); ++ ++ csdb(); ++ return mask; ++} ++ + #define __smp_mb() dmb(ish) + #define __smp_rmb() dmb(ishld) + #define __smp_wmb() dmb(ishst) diff --git a/queue-4.9/arm64-implement-branch-predictor-hardening-for-affected-cortex-a-cpus.patch b/queue-4.9/arm64-implement-branch-predictor-hardening-for-affected-cortex-a-cpus.patch new file mode 100644 index 00000000000..a9a4a527df7 --- /dev/null +++ b/queue-4.9/arm64-implement-branch-predictor-hardening-for-affected-cortex-a-cpus.patch @@ -0,0 +1,135 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:21 +0100 +Subject: [PATCH v4.9.y 25/42] arm64: Implement branch predictor hardening for affected Cortex-A CPUs +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-26-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit aa6acde65e03186b5add8151e1ffe36c3c62639b upstream. + +Cortex-A57, A72, A73 and A75 are susceptible to branch predictor aliasing +and can theoretically be attacked by malicious code. + +This patch implements a PSCI-based mitigation for these CPUs when available. +The call into firmware will invalidate the branch predictor state, preventing +any malicious entries from affecting other victim contexts. + +Co-developed-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/bpi.S | 24 +++++++++++++++++++++++ + arch/arm64/kernel/cpu_errata.c | 42 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 66 insertions(+) + +--- a/arch/arm64/kernel/bpi.S ++++ b/arch/arm64/kernel/bpi.S +@@ -53,3 +53,27 @@ ENTRY(__bp_harden_hyp_vecs_start) + vectors __kvm_hyp_vector + .endr + ENTRY(__bp_harden_hyp_vecs_end) ++ENTRY(__psci_hyp_bp_inval_start) ++ sub sp, sp, #(8 * 18) ++ stp x16, x17, [sp, #(16 * 0)] ++ stp x14, x15, [sp, #(16 * 1)] ++ stp x12, x13, [sp, #(16 * 2)] ++ stp x10, x11, [sp, #(16 * 3)] ++ stp x8, x9, [sp, #(16 * 4)] ++ stp x6, x7, [sp, #(16 * 5)] ++ stp x4, x5, [sp, #(16 * 6)] ++ stp x2, x3, [sp, #(16 * 7)] ++ stp x0, x1, [sp, #(16 * 8)] ++ mov x0, #0x84000000 ++ smc #0 ++ ldp x16, x17, [sp, #(16 * 0)] ++ ldp x14, x15, [sp, #(16 * 1)] ++ ldp x12, x13, [sp, #(16 * 2)] ++ ldp x10, x11, [sp, #(16 * 3)] ++ ldp x8, x9, [sp, #(16 * 4)] ++ ldp x6, x7, [sp, #(16 * 5)] ++ ldp x4, x5, [sp, #(16 * 6)] ++ ldp x2, x3, [sp, #(16 * 7)] ++ ldp x0, x1, [sp, #(16 * 8)] ++ add sp, sp, #(8 * 18) ++ENTRY(__psci_hyp_bp_inval_end) +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -53,6 +53,8 @@ static int cpu_enable_trap_ctr_access(vo + DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + + #ifdef CONFIG_KVM ++extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; ++ + static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) + { +@@ -94,6 +96,9 @@ static void __install_bp_hardening_cb(bp + spin_unlock(&bp_lock); + } + #else ++#define __psci_hyp_bp_inval_start NULL ++#define __psci_hyp_bp_inval_end NULL ++ + static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +@@ -118,6 +123,21 @@ static void install_bp_hardening_cb(con + + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); + } ++ ++#include ++ ++static int enable_psci_bp_hardening(void *data) ++{ ++ const struct arm64_cpu_capabilities *entry = data; ++ ++ if (psci_ops.get_version) ++ install_bp_hardening_cb(entry, ++ (bp_hardening_cb_t)psci_ops.get_version, ++ __psci_hyp_bp_inval_start, ++ __psci_hyp_bp_inval_end); ++ ++ return 0; ++} + #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + + #define MIDR_RANGE(model, min, max) \ +@@ -211,6 +231,28 @@ const struct arm64_cpu_capabilities arm6 + .def_scope = SCOPE_LOCAL_CPU, + .enable = cpu_enable_trap_ctr_access, + }, ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), ++ .enable = enable_psci_bp_hardening, ++ }, ++ { ++ .capability = ARM64_HARDEN_BRANCH_PREDICTOR, ++ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), ++ .enable = enable_psci_bp_hardening, ++ }, ++#endif + { + } + }; diff --git a/queue-4.9/arm64-kill-psci_get_version-as-a-variant-2-workaround.patch b/queue-4.9/arm64-kill-psci_get_version-as-a-variant-2-workaround.patch new file mode 100644 index 00000000000..40124d86f38 --- /dev/null +++ b/queue-4.9/arm64-kill-psci_get_version-as-a-variant-2-workaround.patch @@ -0,0 +1,214 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:38 +0100 +Subject: [PATCH v4.9.y 42/42] arm64: Kill PSCI_GET_VERSION as a variant-2 workaround +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-43-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 3a0a397ff5ff8b56ca9f7908b75dee6bf0b5fabb upstream. + +Now that we've standardised on SMCCC v1.1 to perform the branch +prediction invalidation, let's drop the previous band-aid. +If vendors haven't updated their firmware to do SMCCC 1.1, they +haven't updated PSCI either, so we don't loose anything. + +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/bpi.S | 24 --------------------- + arch/arm64/kernel/cpu_errata.c | 45 +++++++++++------------------------------ + arch/arm64/kvm/hyp/switch.c | 14 ------------ + 3 files changed, 13 insertions(+), 70 deletions(-) + +--- a/arch/arm64/kernel/bpi.S ++++ b/arch/arm64/kernel/bpi.S +@@ -54,30 +54,6 @@ ENTRY(__bp_harden_hyp_vecs_start) + vectors __kvm_hyp_vector + .endr + ENTRY(__bp_harden_hyp_vecs_end) +-ENTRY(__psci_hyp_bp_inval_start) +- sub sp, sp, #(8 * 18) +- stp x16, x17, [sp, #(16 * 0)] +- stp x14, x15, [sp, #(16 * 1)] +- stp x12, x13, [sp, #(16 * 2)] +- stp x10, x11, [sp, #(16 * 3)] +- stp x8, x9, [sp, #(16 * 4)] +- stp x6, x7, [sp, #(16 * 5)] +- stp x4, x5, [sp, #(16 * 6)] +- stp x2, x3, [sp, #(16 * 7)] +- stp x0, x1, [sp, #(16 * 8)] +- mov x0, #0x84000000 +- smc #0 +- ldp x16, x17, [sp, #(16 * 0)] +- ldp x14, x15, [sp, #(16 * 1)] +- ldp x12, x13, [sp, #(16 * 2)] +- ldp x10, x11, [sp, #(16 * 3)] +- ldp x8, x9, [sp, #(16 * 4)] +- ldp x6, x7, [sp, #(16 * 5)] +- ldp x4, x5, [sp, #(16 * 6)] +- ldp x2, x3, [sp, #(16 * 7)] +- ldp x0, x1, [sp, #(16 * 8)] +- add sp, sp, #(8 * 18) +-ENTRY(__psci_hyp_bp_inval_end) + + .macro smccc_workaround_1 inst + sub sp, sp, #(8 * 4) +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -53,7 +53,6 @@ static int cpu_enable_trap_ctr_access(vo + DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + + #ifdef CONFIG_KVM +-extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; + extern char __smccc_workaround_1_smc_start[]; + extern char __smccc_workaround_1_smc_end[]; + extern char __smccc_workaround_1_hvc_start[]; +@@ -100,8 +99,6 @@ static void __install_bp_hardening_cb(bp + spin_unlock(&bp_lock); + } + #else +-#define __psci_hyp_bp_inval_start NULL +-#define __psci_hyp_bp_inval_end NULL + #define __smccc_workaround_1_smc_start NULL + #define __smccc_workaround_1_smc_end NULL + #define __smccc_workaround_1_hvc_start NULL +@@ -146,24 +143,25 @@ static void call_hvc_arch_workaround_1(v + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); + } + +-static bool check_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) ++static int enable_smccc_arch_workaround_1(void *data) + { ++ const struct arm64_cpu_capabilities *entry = data; + bp_hardening_cb_t cb; + void *smccc_start, *smccc_end; + struct arm_smccc_res res; + + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) +- return false; ++ return 0; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) +- return false; ++ return 0; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) +- return false; ++ return 0; + cb = call_hvc_arch_workaround_1; + smccc_start = __smccc_workaround_1_hvc_start; + smccc_end = __smccc_workaround_1_hvc_end; +@@ -173,35 +171,18 @@ static bool check_smccc_arch_workaround_ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if (res.a0) +- return false; ++ return 0; + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + + default: +- return false; ++ return 0; + } + + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); + +- return true; +-} +- +-static int enable_psci_bp_hardening(void *data) +-{ +- const struct arm64_cpu_capabilities *entry = data; +- +- if (psci_ops.get_version) { +- if (check_smccc_arch_workaround_1(entry)) +- return 0; +- +- install_bp_hardening_cb(entry, +- (bp_hardening_cb_t)psci_ops.get_version, +- __psci_hyp_bp_inval_start, +- __psci_hyp_bp_inval_end); +- } +- + return 0; + } + #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +@@ -301,32 +282,32 @@ const struct arm64_cpu_capabilities arm6 + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), +- .enable = enable_psci_bp_hardening, ++ .enable = enable_smccc_arch_workaround_1, + }, + #endif + { +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -311,20 +311,6 @@ again: + if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) + goto again; + +- if (exit_code == ARM_EXCEPTION_TRAP && +- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || +- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32)) { +- u32 val = vcpu_get_reg(vcpu, 0); +- +- if (val == PSCI_0_2_FN_PSCI_VERSION) { +- val = kvm_psci_version(vcpu, kern_hyp_va(vcpu->kvm)); +- if (unlikely(val == KVM_ARM_PSCI_0_1)) +- val = PSCI_RET_NOT_SUPPORTED; +- vcpu_set_reg(vcpu, 0, val); +- goto again; +- } +- } +- + if (static_branch_unlikely(&vgic_v2_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP) { + bool valid; diff --git a/queue-4.9/arm64-kvm-add-smccc_arch_workaround_1-fast-handling.patch b/queue-4.9/arm64-kvm-add-smccc_arch_workaround_1-fast-handling.patch new file mode 100644 index 00000000000..064f7361d1b --- /dev/null +++ b/queue-4.9/arm64-kvm-add-smccc_arch_workaround_1-fast-handling.patch @@ -0,0 +1,76 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:32 +0100 +Subject: [PATCH v4.9.y 36/42] arm64: KVM: Add SMCCC_ARCH_WORKAROUND_1 fast handling +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-37-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit f72af90c3783d924337624659b43e2d36f1b36b4 upstream. + +We want SMCCC_ARCH_WORKAROUND_1 to be fast. As fast as possible. +So let's intercept it as early as we can by testing for the +function call number as soon as we've identified a HVC call +coming from the guest. + +Tested-by: Ard Biesheuvel +Reviewed-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/hyp-entry.S | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kvm/hyp/hyp-entry.S ++++ b/arch/arm64/kvm/hyp/hyp-entry.S +@@ -15,6 +15,7 @@ + * along with this program. If not, see . + */ + ++#include + #include + + #include +@@ -79,10 +80,11 @@ alternative_endif + lsr x0, x1, #ESR_ELx_EC_SHIFT + + cmp x0, #ESR_ELx_EC_HVC64 ++ ccmp x0, #ESR_ELx_EC_HVC32, #4, ne + b.ne el1_trap + +- mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest +- cbnz x1, el1_trap // called HVC ++ mrs x1, vttbr_el2 // If vttbr is valid, the guest ++ cbnz x1, el1_hvc_guest // called HVC + + /* Here, we're pretty sure the host called HVC. */ + ldp x0, x1, [sp], #16 +@@ -101,6 +103,20 @@ alternative_endif + + 2: eret + ++el1_hvc_guest: ++ /* ++ * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. ++ * The workaround has already been applied on the host, ++ * so let's quickly get back to the guest. We don't bother ++ * restoring x1, as it can be clobbered anyway. ++ */ ++ ldr x1, [sp] // Guest's x0 ++ eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 ++ cbnz w1, el1_trap ++ mov x0, x1 ++ add sp, sp, #16 ++ eret ++ + el1_trap: + /* + * x0: ESR_EC diff --git a/queue-4.9/arm64-kvm-increment-pc-after-handling-an-smc-trap.patch b/queue-4.9/arm64-kvm-increment-pc-after-handling-an-smc-trap.patch new file mode 100644 index 00000000000..c3149c60d0c --- /dev/null +++ b/queue-4.9/arm64-kvm-increment-pc-after-handling-an-smc-trap.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:23 +0100 +Subject: [PATCH v4.9.y 27/42] arm64: KVM: Increment PC after handling an SMC trap +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-28-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit f5115e8869e1dfafac0e414b4f1664f3a84a4683 upstream. + +When handling an SMC trap, the "preferred return address" is set +to that of the SMC, and not the next PC (which is a departure from +the behaviour of an SMC that isn't trapped). + +Increment PC in the handler, as the guest is otherwise forever +stuck... + +Cc: stable@vger.kernel.org +Fixes: acfb3b883f6d ("arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls") +Reviewed-by: Christoffer Dall +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/handle_exit.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/arch/arm64/kvm/handle_exit.c ++++ b/arch/arm64/kvm/handle_exit.c +@@ -53,7 +53,16 @@ static int handle_hvc(struct kvm_vcpu *v + + static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) + { ++ /* ++ * "If an SMC instruction executed at Non-secure EL1 is ++ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a ++ * Trap exception, not a Secure Monitor Call exception [...]" ++ * ++ * We need to advance the PC after the trap, as it would ++ * otherwise return to the same address... ++ */ + vcpu_set_reg(vcpu, 0, ~0UL); ++ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; + } + diff --git a/queue-4.9/arm64-kvm-make-psci_version-a-fast-path.patch b/queue-4.9/arm64-kvm-make-psci_version-a-fast-path.patch new file mode 100644 index 00000000000..46bd64a4ef8 --- /dev/null +++ b/queue-4.9/arm64-kvm-make-psci_version-a-fast-path.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:29 +0100 +Subject: [PATCH v4.9.y 33/42] arm64: KVM: Make PSCI_VERSION a fast path +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-34-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 90348689d500410ca7a55624c667f956771dce7f upstream. + +For those CPUs that require PSCI to perform a BP invalidation, +going all the way to the PSCI code for not much is a waste of +precious cycles. Let's terminate that call as early as possible. + +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/switch.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -17,6 +17,7 @@ + + #include + #include ++#include + + #include + #include +@@ -308,6 +309,18 @@ again: + if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) + goto again; + ++ if (exit_code == ARM_EXCEPTION_TRAP && ++ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 || ++ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) && ++ vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) { ++ u64 val = PSCI_RET_NOT_SUPPORTED; ++ if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) ++ val = 2; ++ ++ vcpu_set_reg(vcpu, 0, val); ++ goto again; ++ } ++ + if (static_branch_unlikely(&vgic_v2_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP) { + bool valid; diff --git a/queue-4.9/arm64-kvm-report-smccc_arch_workaround_1-bp-hardening-support.patch b/queue-4.9/arm64-kvm-report-smccc_arch_workaround_1-bp-hardening-support.patch new file mode 100644 index 00000000000..822c40bd2af --- /dev/null +++ b/queue-4.9/arm64-kvm-report-smccc_arch_workaround_1-bp-hardening-support.patch @@ -0,0 +1,101 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:31 +0100 +Subject: [PATCH v4.9.y 35/42] arm64: KVM: Report SMCCC_ARCH_WORKAROUND_1 BP hardening support +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-36-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 6167ec5c9145cdf493722dfd80a5d48bafc4a18a upstream. + +A new feature of SMCCC 1.1 is that it offers firmware-based CPU +workarounds. In particular, SMCCC_ARCH_WORKAROUND_1 provides +BP hardening for CVE-2017-5715. + +If the host has some mitigation for this issue, report that +we deal with it using SMCCC_ARCH_WORKAROUND_1, as we apply the +host workaround on every guest exit. + +Tested-by: Ard Biesheuvel +Reviewed-by: Christoffer Dall +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/kvm_host.h | 6 ++++++ + arch/arm/kvm/psci.c | 9 ++++++++- + arch/arm64/include/asm/kvm_host.h | 5 +++++ + include/linux/arm-smccc.h | 5 +++++ + 4 files changed, 24 insertions(+), 1 deletion(-) + +--- a/arch/arm/include/asm/kvm_host.h ++++ b/arch/arm/include/asm/kvm_host.h +@@ -318,4 +318,10 @@ static inline int kvm_arm_vcpu_arch_has_ + return -ENXIO; + } + ++static inline bool kvm_arm_harden_branch_predictor(void) ++{ ++ /* No way to detect it yet, pretend it is not there. */ ++ return false; ++} ++ + #endif /* __ARM_KVM_HOST_H__ */ +--- a/arch/arm/kvm/psci.c ++++ b/arch/arm/kvm/psci.c +@@ -403,13 +403,20 @@ int kvm_hvc_call_handler(struct kvm_vcpu + { + u32 func_id = smccc_get_function(vcpu); + u32 val = PSCI_RET_NOT_SUPPORTED; ++ u32 feature; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: +- /* Nothing supported yet */ ++ feature = smccc_get_arg1(vcpu); ++ switch(feature) { ++ case ARM_SMCCC_ARCH_WORKAROUND_1: ++ if (kvm_arm_harden_branch_predictor()) ++ val = 0; ++ break; ++ } + break; + default: + return kvm_psci_call(vcpu); +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -393,4 +393,9 @@ static inline void __cpu_init_stage2(voi + "PARange is %d bits, unsupported configuration!", parange); + } + ++static inline bool kvm_arm_harden_branch_predictor(void) ++{ ++ return cpus_have_cap(ARM64_HARDEN_BRANCH_PREDICTOR); ++} ++ + #endif /* __ARM64_KVM_HOST_H__ */ +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -73,6 +73,11 @@ + ARM_SMCCC_SMC_32, \ + 0, 1) + ++#define ARM_SMCCC_ARCH_WORKAROUND_1 \ ++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ++ ARM_SMCCC_SMC_32, \ ++ 0, 0x8000) ++ + #ifndef __ASSEMBLY__ + + #include diff --git a/queue-4.9/arm64-kvm-use-per-cpu-vector-when-bp-hardening-is-enabled.patch b/queue-4.9/arm64-kvm-use-per-cpu-vector-when-bp-hardening-is-enabled.patch new file mode 100644 index 00000000000..dec31025acb --- /dev/null +++ b/queue-4.9/arm64-kvm-use-per-cpu-vector-when-bp-hardening-is-enabled.patch @@ -0,0 +1,132 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:16 +0100 +Subject: [PATCH v4.9.y 20/42] arm64: KVM: Use per-CPU vector when BP hardening is enabled +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-21-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 6840bdd73d07216ab4bc46f5a8768c37ea519038 upstream. + +Now that we have per-CPU vectors, let's plug then in the KVM/arm64 code. + +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +[v4.9: account for files moved to virt/ upstream, use cpus_have_cap()] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/kvm_mmu.h | 10 ++++++++++ + arch/arm/kvm/arm.c | 9 ++++++++- + arch/arm64/include/asm/kvm_mmu.h | 38 ++++++++++++++++++++++++++++++++++++++ + arch/arm64/kvm/hyp/switch.c | 2 +- + 4 files changed, 57 insertions(+), 2 deletions(-) + +--- a/arch/arm/include/asm/kvm_mmu.h ++++ b/arch/arm/include/asm/kvm_mmu.h +@@ -223,6 +223,16 @@ static inline unsigned int kvm_get_vmid_ + return 8; + } + ++static inline void *kvm_get_hyp_vector(void) ++{ ++ return kvm_ksym_ref(__kvm_hyp_vector); ++} ++ ++static inline int kvm_map_vectors(void) ++{ ++ return 0; ++} ++ + #endif /* !__ASSEMBLY__ */ + + #endif /* __ARM_KVM_MMU_H__ */ +--- a/arch/arm/kvm/arm.c ++++ b/arch/arm/kvm/arm.c +@@ -1088,7 +1088,7 @@ static void cpu_init_hyp_mode(void *dumm + pgd_ptr = kvm_mmu_get_httbr(); + stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); + hyp_stack_ptr = stack_page + PAGE_SIZE; +- vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); ++ vector_ptr = (unsigned long)kvm_get_hyp_vector(); + + __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_stage2(); +@@ -1345,6 +1345,13 @@ static int init_hyp_mode(void) + goto out_err; + } + ++ ++ err = kvm_map_vectors(); ++ if (err) { ++ kvm_err("Cannot map vectors\n"); ++ goto out_err; ++ } ++ + /* + * Map the Hyp stack pages + */ +--- a/arch/arm64/include/asm/kvm_mmu.h ++++ b/arch/arm64/include/asm/kvm_mmu.h +@@ -313,5 +313,43 @@ static inline unsigned int kvm_get_vmid_ + return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; + } + ++#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR ++#include ++ ++static inline void *kvm_get_hyp_vector(void) ++{ ++ struct bp_hardening_data *data = arm64_get_bp_hardening_data(); ++ void *vect = kvm_ksym_ref(__kvm_hyp_vector); ++ ++ if (data->fn) { ++ vect = __bp_harden_hyp_vecs_start + ++ data->hyp_vectors_slot * SZ_2K; ++ ++ if (!cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) ++ vect = lm_alias(vect); ++ } ++ ++ return vect; ++} ++ ++static inline int kvm_map_vectors(void) ++{ ++ return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), ++ kvm_ksym_ref(__bp_harden_hyp_vecs_end), ++ PAGE_HYP_EXEC); ++} ++ ++#else ++static inline void *kvm_get_hyp_vector(void) ++{ ++ return kvm_ksym_ref(__kvm_hyp_vector); ++} ++ ++static inline int kvm_map_vectors(void) ++{ ++ return 0; ++} ++#endif ++ + #endif /* __ASSEMBLY__ */ + #endif /* __ARM64_KVM_MMU_H__ */ +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -50,7 +50,7 @@ static void __hyp_text __activate_traps_ + val &= ~CPACR_EL1_FPEN; + write_sysreg(val, cpacr_el1); + +- write_sysreg(__kvm_hyp_vector, vbar_el1); ++ write_sysreg(kvm_get_hyp_vector(), vbar_el1); + } + + static void __hyp_text __activate_traps_nvhe(void) diff --git a/queue-4.9/arm64-make-user_ds-an-inclusive-limit.patch b/queue-4.9/arm64-make-user_ds-an-inclusive-limit.patch new file mode 100644 index 00000000000..3a8f88dcb72 --- /dev/null +++ b/queue-4.9/arm64-make-user_ds-an-inclusive-limit.patch @@ -0,0 +1,157 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:00 +0100 +Subject: [PATCH v4.9.y 04/42] arm64: Make USER_DS an inclusive limit +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-5-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Robin Murphy + +commit 51369e398d0d33e8f524314e672b07e8cf870e79 upstream. + +Currently, USER_DS represents an exclusive limit while KERNEL_DS is +inclusive. In order to do some clever trickery for speculation-safe +masking, we need them both to behave equivalently - there aren't enough +bits to make KERNEL_DS exclusive, so we have precisely one option. This +also happens to correct a longstanding false negative for a range +ending on the very top byte of kernel memory. + +Mark Rutland points out that we've actually got the semantics of +addresses vs. segments muddled up in most of the places we need to +amend, so shuffle the {USER,KERNEL}_DS definitions around such that we +can correct those properly instead of just pasting "-1"s everywhere. + +Signed-off-by: Robin Murphy +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +[v4.9: avoid dependence on TTBR0 SW PAN and THREAD_INFO_IN_TASK_STRUCT] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/processor.h | 3 ++ + arch/arm64/include/asm/uaccess.h | 46 +++++++++++++++++++++---------------- + arch/arm64/kernel/entry.S | 4 +-- + arch/arm64/mm/fault.c | 2 - + 4 files changed, 33 insertions(+), 22 deletions(-) + +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -21,6 +21,9 @@ + + #define TASK_SIZE_64 (UL(1) << VA_BITS) + ++#define KERNEL_DS UL(-1) ++#define USER_DS (TASK_SIZE_64 - 1) ++ + #ifndef __ASSEMBLY__ + + /* +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -28,6 +28,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -59,10 +60,7 @@ struct exception_table_entry + + extern int fixup_exception(struct pt_regs *regs); + +-#define KERNEL_DS (-1UL) + #define get_ds() (KERNEL_DS) +- +-#define USER_DS TASK_SIZE_64 + #define get_fs() (current_thread_info()->addr_limit) + + static inline void set_fs(mm_segment_t fs) +@@ -87,22 +85,32 @@ static inline void set_fs(mm_segment_t f + * Returns 1 if the range is valid, 0 otherwise. + * + * This is equivalent to the following test: +- * (u65)addr + (u65)size <= current->addr_limit +- * +- * This needs 65-bit arithmetic. ++ * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 + */ +-#define __range_ok(addr, size) \ +-({ \ +- unsigned long __addr = (unsigned long __force)(addr); \ +- unsigned long flag, roksum; \ +- __chk_user_ptr(addr); \ +- asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ +- : "=&r" (flag), "=&r" (roksum) \ +- : "1" (__addr), "Ir" (size), \ +- "r" (current_thread_info()->addr_limit) \ +- : "cc"); \ +- flag; \ +-}) ++static inline unsigned long __range_ok(unsigned long addr, unsigned long size) ++{ ++ unsigned long limit = current_thread_info()->addr_limit; ++ ++ __chk_user_ptr(addr); ++ asm volatile( ++ // A + B <= C + 1 for all A,B,C, in four easy steps: ++ // 1: X = A + B; X' = X % 2^64 ++ " adds %0, %0, %2\n" ++ // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 ++ " csel %1, xzr, %1, hi\n" ++ // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' ++ // to compensate for the carry flag being set in step 4. For ++ // X > 2^64, X' merely has to remain nonzero, which it does. ++ " csinv %0, %0, xzr, cc\n" ++ // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1 ++ // comes from the carry in being clear. Otherwise, we are ++ // testing X' - C == 0, subject to the previous adjustments. ++ " sbcs xzr, %0, %1\n" ++ " cset %0, ls\n" ++ : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); ++ ++ return addr; ++} + + /* + * When dealing with data aborts, watchpoints, or instruction traps we may end +@@ -111,7 +119,7 @@ static inline void set_fs(mm_segment_t f + */ + #define untagged_addr(addr) sign_extend64(addr, 55) + +-#define access_ok(type, addr, size) __range_ok(addr, size) ++#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) + #define user_addr_max get_fs + + #define _ASM_EXTABLE(from, to) \ +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -126,10 +126,10 @@ alternative_else_nop_endif + .else + add x21, sp, #S_FRAME_SIZE + get_thread_info tsk +- /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ ++ /* Save the task's original addr_limit and set USER_DS */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] +- mov x20, #TASK_SIZE_64 ++ mov x20, #USER_DS + str x20, [tsk, #TI_ADDR_LIMIT] + /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ + .endif /* \el == 0 */ +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -332,7 +332,7 @@ static int __kprobes do_page_fault(unsig + mm_flags |= FAULT_FLAG_WRITE; + } + +- if (is_permission_fault(esr) && (addr < USER_DS)) { ++ if (is_permission_fault(esr) && (addr < TASK_SIZE)) { + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) + die("Accessing user space memory with fs=KERNEL_DS", regs, esr); diff --git a/queue-4.9/arm64-move-bp-hardening-to-check_and_switch_context.patch b/queue-4.9/arm64-move-bp-hardening-to-check_and_switch_context.patch new file mode 100644 index 00000000000..cb53b854c86 --- /dev/null +++ b/queue-4.9/arm64-move-bp-hardening-to-check_and_switch_context.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:14 +0100 +Subject: [PATCH v4.9.y 18/42] arm64: Move BP hardening to check_and_switch_context +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-19-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit a8e4c0a919ae310944ed2c9ace11cf3ccd8a609b upstream. + +We call arm64_apply_bp_hardening() from post_ttbr_update_workaround, +which has the unexpected consequence of being triggered on every +exception return to userspace when ARM64_SW_TTBR0_PAN is selected, +even if no context switch actually occured. + +This is a bit suboptimal, and it would be more logical to only +invalidate the branch predictor when we actually switch to +a different mm. + +In order to solve this, move the call to arm64_apply_bp_hardening() +into check_and_switch_context(), where we're guaranteed to pick +a different mm context. + +Acked-by: Will Deacon +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/mm/context.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/arm64/mm/context.c ++++ b/arch/arm64/mm/context.c +@@ -230,6 +230,9 @@ void check_and_switch_context(struct mm_ + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + switch_mm_fastpath: ++ ++ arm64_apply_bp_hardening(); ++ + cpu_switch_mm(mm->pgd, mm); + } + +@@ -240,8 +243,6 @@ asmlinkage void post_ttbr_update_workaro + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); +- +- arm64_apply_bp_hardening(); + } + + static int asids_init(void) diff --git a/queue-4.9/arm64-move-post_ttbr_update_workaround-to-c-code.patch b/queue-4.9/arm64-move-post_ttbr_update_workaround-to-c-code.patch new file mode 100644 index 00000000000..835ad8800ce --- /dev/null +++ b/queue-4.9/arm64-move-post_ttbr_update_workaround-to-c-code.patch @@ -0,0 +1,81 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:12 +0100 +Subject: [PATCH v4.9.y 16/42] arm64: Move post_ttbr_update_workaround to C code +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-17-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 95e3de3590e3f2358bb13f013911bc1bfa5d3f53 upstream. + +We will soon need to invoke a CPU-specific function pointer after changing +page tables, so move post_ttbr_update_workaround out into C code to make +this possible. + +Signed-off-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/assembler.h | 13 ------------- + arch/arm64/mm/context.c | 9 +++++++++ + arch/arm64/mm/proc.S | 3 +-- + 3 files changed, 10 insertions(+), 15 deletions(-) + +--- a/arch/arm64/include/asm/assembler.h ++++ b/arch/arm64/include/asm/assembler.h +@@ -435,17 +435,4 @@ alternative_endif + and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) + .endm + +-/* +- * Errata workaround post TTBR0_EL1 update. +- */ +- .macro post_ttbr0_update_workaround +-#ifdef CONFIG_CAVIUM_ERRATUM_27456 +-alternative_if ARM64_WORKAROUND_CAVIUM_27456 +- ic iallu +- dsb nsh +- isb +-alternative_else_nop_endif +-#endif +- .endm +- + #endif /* __ASM_ASSEMBLER_H */ +--- a/arch/arm64/mm/context.c ++++ b/arch/arm64/mm/context.c +@@ -233,6 +233,15 @@ switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); + } + ++/* Errata workaround post TTBRx_EL1 update. */ ++asmlinkage void post_ttbr_update_workaround(void) ++{ ++ asm(ALTERNATIVE("nop; nop; nop", ++ "ic iallu; dsb nsh; isb", ++ ARM64_WORKAROUND_CAVIUM_27456, ++ CONFIG_CAVIUM_ERRATUM_27456)); ++} ++ + static int asids_init(void) + { + asid_bits = get_cpu_asid_bits(); +--- a/arch/arm64/mm/proc.S ++++ b/arch/arm64/mm/proc.S +@@ -139,8 +139,7 @@ ENTRY(cpu_do_switch_mm) + isb + msr ttbr0_el1, x0 // now update TTBR0 + isb +- post_ttbr0_update_workaround +- ret ++ b post_ttbr_update_workaround // Back to C code... + ENDPROC(cpu_do_switch_mm) + + .pushsection ".idmap.text", "awx" diff --git a/queue-4.9/arm64-move-task_-definitions-to-asm-processor.h.patch b/queue-4.9/arm64-move-task_-definitions-to-asm-processor.h.patch new file mode 100644 index 00000000000..f313be3189b --- /dev/null +++ b/queue-4.9/arm64-move-task_-definitions-to-asm-processor.h.patch @@ -0,0 +1,128 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:10:59 +0100 +Subject: [PATCH v4.9.y 03/42] arm64: move TASK_* definitions to +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-4-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Yury Norov + +commit eef94a3d09aab437c8c254de942d8b1aa76455e2 upstream. + +ILP32 series [1] introduces the dependency on for +TASK_SIZE macro. Which in turn requires , and + include , giving a circular dependency, +because TASK_SIZE is currently located in . + +In other architectures, TASK_SIZE is defined in , and +moving TASK_SIZE there fixes the problem. + +Discussion: https://patchwork.kernel.org/patch/9929107/ + +[1] https://github.com/norov/linux/tree/ilp32-next + +CC: Will Deacon +CC: Laura Abbott +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: James Morse +Suggested-by: Mark Rutland +Signed-off-by: Yury Norov +Signed-off-by: Will Deacon +[v4.9: necessary for making USER_DS an inclusive limit] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/memory.h | 15 --------------- + arch/arm64/include/asm/processor.h | 21 +++++++++++++++++++++ + arch/arm64/kernel/entry.S | 1 + + 3 files changed, 22 insertions(+), 15 deletions(-) + +--- a/arch/arm64/include/asm/memory.h ++++ b/arch/arm64/include/asm/memory.h +@@ -60,8 +60,6 @@ + * KIMAGE_VADDR - the virtual address of the start of the kernel image + * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. +- * TASK_SIZE - the maximum size of a user space task. +- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. + */ + #define VA_BITS (CONFIG_ARM64_VA_BITS) + #define VA_START (UL(0xffffffffffffffff) - \ +@@ -76,19 +74,6 @@ + #define PCI_IO_END (VMEMMAP_START - SZ_2M) + #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + #define FIXADDR_TOP (PCI_IO_START - SZ_2M) +-#define TASK_SIZE_64 (UL(1) << VA_BITS) +- +-#ifdef CONFIG_COMPAT +-#define TASK_SIZE_32 UL(0x100000000) +-#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ +- TASK_SIZE_32 : TASK_SIZE_64) +-#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ +- TASK_SIZE_32 : TASK_SIZE_64) +-#else +-#define TASK_SIZE TASK_SIZE_64 +-#endif /* CONFIG_COMPAT */ +- +-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) + + #define KERNEL_START _text + #define KERNEL_END _end +--- a/arch/arm64/include/asm/processor.h ++++ b/arch/arm64/include/asm/processor.h +@@ -19,6 +19,10 @@ + #ifndef __ASM_PROCESSOR_H + #define __ASM_PROCESSOR_H + ++#define TASK_SIZE_64 (UL(1) << VA_BITS) ++ ++#ifndef __ASSEMBLY__ ++ + /* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). +@@ -37,6 +41,22 @@ + #include + #include + ++/* ++ * TASK_SIZE - the maximum size of a user space task. ++ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. ++ */ ++#ifdef CONFIG_COMPAT ++#define TASK_SIZE_32 UL(0x100000000) ++#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ ++ TASK_SIZE_32 : TASK_SIZE_64) ++#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ ++ TASK_SIZE_32 : TASK_SIZE_64) ++#else ++#define TASK_SIZE TASK_SIZE_64 ++#endif /* CONFIG_COMPAT */ ++ ++#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) ++ + #define STACK_TOP_MAX TASK_SIZE_64 + #ifdef CONFIG_COMPAT + #define AARCH32_VECTORS_BASE 0xffff0000 +@@ -192,4 +212,5 @@ int cpu_enable_pan(void *__unused); + int cpu_enable_uao(void *__unused); + int cpu_enable_cache_maint_trap(void *__unused); + ++#endif /* __ASSEMBLY__ */ + #endif /* __ASM_PROCESSOR_H */ +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/queue-4.9/arm64-run-enable-method-for-errata-work-arounds-on-late-cpus.patch b/queue-4.9/arm64-run-enable-method-for-errata-work-arounds-on-late-cpus.patch new file mode 100644 index 00000000000..6bc2cec7d6c --- /dev/null +++ b/queue-4.9/arm64-run-enable-method-for-errata-work-arounds-on-late-cpus.patch @@ -0,0 +1,62 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:08 +0100 +Subject: [PATCH v4.9.y 12/42] arm64: Run enable method for errata work arounds on late CPUs +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-13-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Suzuki K Poulose + +commit 55b35d070c2534dfb714b883f3c3ae05d02032da upstream. + +When a CPU is brought up after we have finalised the system +wide capabilities (i.e, features and errata), we make sure the +new CPU doesn't need a new errata work around which has not been +detected already. However we don't run enable() method on the new +CPU for the errata work arounds already detected. This could +cause the new CPU running without potential work arounds. +It is upto the "enable()" method to decide if this CPU should +do something about the errata. + +Fixes: commit 6a6efbb45b7d95c84 ("arm64: Verify CPU errata work arounds on hotplugged CPU") +Cc: Will Deacon +Cc: Mark Rutland +Cc: Andre Przywara +Cc: Dave Martin +Signed-off-by: Suzuki K Poulose +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpu_errata.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -143,15 +143,18 @@ void verify_local_cpu_errata_workarounds + { + const struct arm64_cpu_capabilities *caps = arm64_errata; + +- for (; caps->matches; caps++) +- if (!cpus_have_cap(caps->capability) && +- caps->matches(caps, SCOPE_LOCAL_CPU)) { ++ for (; caps->matches; caps++) { ++ if (cpus_have_cap(caps->capability)) { ++ if (caps->enable) ++ caps->enable((void *)caps); ++ } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { + pr_crit("CPU%d: Requires work around for %s, not detected" + " at boot time\n", + smp_processor_id(), + caps->desc ? : "an erratum"); + cpu_die_early(); + } ++ } + } + + void update_cpu_errata_workarounds(void) diff --git a/queue-4.9/arm64-uaccess-don-t-bother-eliding-access_ok-checks-in-__-get-put-_user.patch b/queue-4.9/arm64-uaccess-don-t-bother-eliding-access_ok-checks-in-__-get-put-_user.patch new file mode 100644 index 00000000000..b9acc0ab985 --- /dev/null +++ b/queue-4.9/arm64-uaccess-don-t-bother-eliding-access_ok-checks-in-__-get-put-_user.patch @@ -0,0 +1,134 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:04 +0100 +Subject: [PATCH v4.9.y 08/42] arm64: uaccess: Don't bother eliding access_ok checks in __{get, put}_user +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-9-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit 84624087dd7e3b482b7b11c170ebc1f329b3a218 upstream. + +access_ok isn't an expensive operation once the addr_limit for the current +thread has been loaded into the cache. Given that the initial access_ok +check preceding a sequence of __{get,put}_user operations will take +the brunt of the miss, we can make the __* variants identical to the +full-fat versions, which brings with it the benefits of address masking. + +The likely cost in these sequences will be from toggling PAN/UAO, which +we can address later by implementing the *_unsafe versions. + +Reviewed-by: Robin Murphy +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/uaccess.h | 62 ++++++++++++++++++++++----------------- + 1 file changed, 36 insertions(+), 26 deletions(-) + +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -209,30 +209,35 @@ do { \ + CONFIG_ARM64_PAN)); \ + } while (0) + +-#define __get_user(x, ptr) \ ++#define __get_user_check(x, ptr, err) \ + ({ \ +- int __gu_err = 0; \ +- __get_user_err((x), (ptr), __gu_err); \ +- __gu_err; \ ++ __typeof__(*(ptr)) __user *__p = (ptr); \ ++ might_fault(); \ ++ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ ++ __p = uaccess_mask_ptr(__p); \ ++ __get_user_err((x), __p, (err)); \ ++ } else { \ ++ (x) = 0; (err) = -EFAULT; \ ++ } \ + }) + + #define __get_user_error(x, ptr, err) \ + ({ \ +- __get_user_err((x), (ptr), (err)); \ ++ __get_user_check((x), (ptr), (err)); \ + (void)0; \ + }) + +-#define __get_user_unaligned __get_user +- +-#define get_user(x, ptr) \ ++#define __get_user(x, ptr) \ + ({ \ +- __typeof__(*(ptr)) __user *__p = (ptr); \ +- might_fault(); \ +- access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ +- __p = uaccess_mask_ptr(__p), __get_user((x), __p) : \ +- ((x) = 0, -EFAULT); \ ++ int __gu_err = 0; \ ++ __get_user_check((x), (ptr), __gu_err); \ ++ __gu_err; \ + }) + ++#define __get_user_unaligned __get_user ++ ++#define get_user __get_user ++ + #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ + asm volatile( \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ +@@ -277,30 +282,35 @@ do { \ + CONFIG_ARM64_PAN)); \ + } while (0) + +-#define __put_user(x, ptr) \ ++#define __put_user_check(x, ptr, err) \ + ({ \ +- int __pu_err = 0; \ +- __put_user_err((x), (ptr), __pu_err); \ +- __pu_err; \ ++ __typeof__(*(ptr)) __user *__p = (ptr); \ ++ might_fault(); \ ++ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ ++ __p = uaccess_mask_ptr(__p); \ ++ __put_user_err((x), __p, (err)); \ ++ } else { \ ++ (err) = -EFAULT; \ ++ } \ + }) + + #define __put_user_error(x, ptr, err) \ + ({ \ +- __put_user_err((x), (ptr), (err)); \ ++ __put_user_check((x), (ptr), (err)); \ + (void)0; \ + }) + +-#define __put_user_unaligned __put_user +- +-#define put_user(x, ptr) \ ++#define __put_user(x, ptr) \ + ({ \ +- __typeof__(*(ptr)) __user *__p = (ptr); \ +- might_fault(); \ +- access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ +- __p = uaccess_mask_ptr(__p), __put_user((x), __p) : \ +- -EFAULT; \ ++ int __pu_err = 0; \ ++ __put_user_check((x), (ptr), __pu_err); \ ++ __pu_err; \ + }) + ++#define __put_user_unaligned __put_user ++ ++#define put_user __put_user ++ + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); + extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); + extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); diff --git a/queue-4.9/arm64-uaccess-mask-__user-pointers-for-__arch_-clear-copy_-_user.patch b/queue-4.9/arm64-uaccess-mask-__user-pointers-for-__arch_-clear-copy_-_user.patch new file mode 100644 index 00000000000..15e7ebeeb34 --- /dev/null +++ b/queue-4.9/arm64-uaccess-mask-__user-pointers-for-__arch_-clear-copy_-_user.patch @@ -0,0 +1,151 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:05 +0100 +Subject: [PATCH v4.9.y 09/42] arm64: uaccess: Mask __user pointers for __arch_{clear, copy_*}_user +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-10-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit f71c2ffcb20dd8626880747557014bb9a61eb90e upstream. + +Like we've done for get_user and put_user, ensure that user pointers +are masked before invoking the underlying __arch_{clear,copy_*}_user +operations. + +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +[v4.9: fixup for v4.9-style uaccess primitives] +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/uaccess.h | 18 ++++++++++-------- + arch/arm64/kernel/arm64ksyms.c | 4 ++-- + arch/arm64/lib/clear_user.S | 6 +++--- + arch/arm64/lib/copy_in_user.S | 4 ++-- + 4 files changed, 17 insertions(+), 15 deletions(-) + +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -313,21 +313,20 @@ do { \ + + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); + extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); +-extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); +-extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); ++extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); + + static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) + { + kasan_check_write(to, n); + check_object_size(to, n, false); +- return __arch_copy_from_user(to, from, n); ++ return __arch_copy_from_user(to, __uaccess_mask_ptr(from), n); + } + + static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) + { + kasan_check_read(from, n); + check_object_size(from, n, true); +- return __arch_copy_to_user(to, from, n); ++ return __arch_copy_to_user(__uaccess_mask_ptr(to), from, n); + } + + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +@@ -355,22 +354,25 @@ static inline unsigned long __must_check + return n; + } + +-static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) ++static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) + { + if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n)) +- n = __copy_in_user(to, from, n); ++ n = __arch_copy_in_user(__uaccess_mask_ptr(to), __uaccess_mask_ptr(from), n); + return n; + } ++#define copy_in_user __copy_in_user + + #define __copy_to_user_inatomic __copy_to_user + #define __copy_from_user_inatomic __copy_from_user + +-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) ++extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); ++static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) + { + if (access_ok(VERIFY_WRITE, to, n)) +- n = __clear_user(__uaccess_mask_ptr(to), n); ++ n = __arch_clear_user(__uaccess_mask_ptr(to), n); + return n; + } ++#define clear_user __clear_user + + extern long strncpy_from_user(char *dest, const char __user *src, long count); + +--- a/arch/arm64/kernel/arm64ksyms.c ++++ b/arch/arm64/kernel/arm64ksyms.c +@@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ + EXPORT_SYMBOL(__arch_copy_from_user); + EXPORT_SYMBOL(__arch_copy_to_user); +-EXPORT_SYMBOL(__clear_user); +-EXPORT_SYMBOL(__copy_in_user); ++EXPORT_SYMBOL(__arch_clear_user); ++EXPORT_SYMBOL(__arch_copy_in_user); + + /* physical memory */ + EXPORT_SYMBOL(memstart_addr); +--- a/arch/arm64/lib/clear_user.S ++++ b/arch/arm64/lib/clear_user.S +@@ -24,7 +24,7 @@ + + .text + +-/* Prototype: int __clear_user(void *addr, size_t sz) ++/* Prototype: int __arch_clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear +@@ -32,7 +32,7 @@ + * + * Alignment fixed up by hardware. + */ +-ENTRY(__clear_user) ++ENTRY(__arch_clear_user) + ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ + CONFIG_ARM64_PAN) + mov x2, x1 // save the size for fixup return +@@ -57,7 +57,7 @@ uao_user_alternative 9f, strb, sttrb, wz + ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ + CONFIG_ARM64_PAN) + ret +-ENDPROC(__clear_user) ++ENDPROC(__arch_clear_user) + + .section .fixup,"ax" + .align 2 +--- a/arch/arm64/lib/copy_in_user.S ++++ b/arch/arm64/lib/copy_in_user.S +@@ -67,7 +67,7 @@ + .endm + + end .req x5 +-ENTRY(__copy_in_user) ++ENTRY(__arch_copy_in_user) + ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ + CONFIG_ARM64_PAN) + add end, x0, x2 +@@ -76,7 +76,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTAT + CONFIG_ARM64_PAN) + mov x0, #0 + ret +-ENDPROC(__copy_in_user) ++ENDPROC(__arch_copy_in_user) + + .section .fixup,"ax" + .align 2 diff --git a/queue-4.9/arm64-uaccess-prevent-speculative-use-of-the-current-addr_limit.patch b/queue-4.9/arm64-uaccess-prevent-speculative-use-of-the-current-addr_limit.patch new file mode 100644 index 00000000000..9c1d3efa86e --- /dev/null +++ b/queue-4.9/arm64-uaccess-prevent-speculative-use-of-the-current-addr_limit.patch @@ -0,0 +1,49 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:03 +0100 +Subject: [PATCH v4.9.y 07/42] arm64: uaccess: Prevent speculative use of the current addr_limit +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-8-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit c2f0ad4fc089cff81cef6a13d04b399980ecbfcc upstream. + +A mispredicted conditional call to set_fs could result in the wrong +addr_limit being forwarded under speculation to a subsequent access_ok +check, potentially forming part of a spectre-v1 attack using uaccess +routines. + +This patch prevents this forwarding from taking place, but putting heavy +barriers in set_fs after writing the addr_limit. + +Reviewed-by: Mark Rutland +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/uaccess.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -68,6 +68,13 @@ static inline void set_fs(mm_segment_t f + current_thread_info()->addr_limit = fs; + + /* ++ * Prevent a mispredicted conditional call to set_fs from forwarding ++ * the wrong address limit to access_ok under speculation. ++ */ ++ dsb(nsh); ++ isb(); ++ ++ /* + * Enable/disable UAO so that copy_to_user() etc can access + * kernel memory with the unprivileged instructions. + */ diff --git a/queue-4.9/arm64-use-pointer-masking-to-limit-uaccess-speculation.patch b/queue-4.9/arm64-use-pointer-masking-to-limit-uaccess-speculation.patch new file mode 100644 index 00000000000..b6e3336d8df --- /dev/null +++ b/queue-4.9/arm64-use-pointer-masking-to-limit-uaccess-speculation.patch @@ -0,0 +1,89 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:01 +0100 +Subject: [PATCH v4.9.y 05/42] arm64: Use pointer masking to limit uaccess speculation +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-6-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Robin Murphy + +commit 4d8efc2d5ee4c9ccfeb29ee8afd47a8660d0c0ce upstream. + +Similarly to x86, mitigate speculation past an access_ok() check by +masking the pointer against the address limit before use. + +Even if we don't expect speculative writes per se, it is plausible that +a CPU may still speculate at least as far as fetching a cache line for +writing, hence we also harden put_user() and clear_user() for peace of +mind. + +Signed-off-by: Robin Murphy +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/uaccess.h | 26 +++++++++++++++++++++++--- + 1 file changed, 23 insertions(+), 3 deletions(-) + +--- a/arch/arm64/include/asm/uaccess.h ++++ b/arch/arm64/include/asm/uaccess.h +@@ -129,6 +129,26 @@ static inline unsigned long __range_ok(u + " .popsection\n" + + /* ++ * Sanitise a uaccess pointer such that it becomes NULL if above the ++ * current addr_limit. ++ */ ++#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) ++static inline void __user *__uaccess_mask_ptr(const void __user *ptr) ++{ ++ void __user *safe_ptr; ++ ++ asm volatile( ++ " bics xzr, %1, %2\n" ++ " csel %0, %1, xzr, eq\n" ++ : "=&r" (safe_ptr) ++ : "r" (ptr), "r" (current_thread_info()->addr_limit) ++ : "cc"); ++ ++ csdb(); ++ return safe_ptr; ++} ++ ++/* + * The "__xxx" versions of the user access functions do not verify the address + * space - it must have been done previously with a separate "access_ok()" + * call. +@@ -202,7 +222,7 @@ do { \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \ +- __get_user((x), __p) : \ ++ __p = uaccess_mask_ptr(__p), __get_user((x), __p) : \ + ((x) = 0, -EFAULT); \ + }) + +@@ -270,7 +290,7 @@ do { \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \ +- __put_user((x), __p) : \ ++ __p = uaccess_mask_ptr(__p), __put_user((x), __p) : \ + -EFAULT; \ + }) + +@@ -331,7 +351,7 @@ static inline unsigned long __must_check + static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) + { + if (access_ok(VERIFY_WRITE, to, n)) +- n = __clear_user(to, n); ++ n = __clear_user(__uaccess_mask_ptr(to), n); + return n; + } + diff --git a/queue-4.9/drivers-firmware-expose-psci_get_version-through-psci_ops-structure.patch b/queue-4.9/drivers-firmware-expose-psci_get_version-through-psci_ops-structure.patch new file mode 100644 index 00000000000..6538a52f707 --- /dev/null +++ b/queue-4.9/drivers-firmware-expose-psci_get_version-through-psci_ops-structure.patch @@ -0,0 +1,53 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:10 +0100 +Subject: [PATCH v4.9.y 14/42] drivers/firmware: Expose psci_get_version through psci_ops structure +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-15-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Will Deacon + +commit d68e3ba5303f7e1099f51fdcd155f5263da8569b upstream. + +Entry into recent versions of ARM Trusted Firmware will invalidate the CPU +branch predictor state in order to protect against aliasing attacks. + +This patch exposes the PSCI "VERSION" function via psci_ops, so that it +can be invoked outside of the PSCI driver where necessary. + +Acked-by: Lorenzo Pieralisi +Signed-off-by: Will Deacon +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/psci.c | 2 ++ + include/linux/psci.h | 1 + + 2 files changed, 3 insertions(+) + +--- a/drivers/firmware/psci.c ++++ b/drivers/firmware/psci.c +@@ -496,6 +496,8 @@ static void __init psci_init_migrate(voi + static void __init psci_0_2_set_functions(void) + { + pr_info("Using standard PSCI v0.2 function IDs\n"); ++ psci_ops.get_version = psci_get_version; ++ + psci_function_id[PSCI_FN_CPU_SUSPEND] = + PSCI_FN_NATIVE(0_2, CPU_SUSPEND); + psci_ops.cpu_suspend = psci_cpu_suspend; +--- a/include/linux/psci.h ++++ b/include/linux/psci.h +@@ -26,6 +26,7 @@ int psci_cpu_init_idle(unsigned int cpu) + int psci_cpu_suspend_enter(unsigned long index); + + struct psci_operations { ++ u32 (*get_version)(void); + int (*cpu_suspend)(u32 state, unsigned long entry_point); + int (*cpu_off)(u32 state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); diff --git a/queue-4.9/firmware-psci-expose-psci-conduit.patch b/queue-4.9/firmware-psci-expose-psci-conduit.patch new file mode 100644 index 00000000000..baa52fe4cc0 --- /dev/null +++ b/queue-4.9/firmware-psci-expose-psci-conduit.patch @@ -0,0 +1,115 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:33 +0100 +Subject: [PATCH v4.9.y 37/42] firmware/psci: Expose PSCI conduit +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-38-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit 09a8d6d48499f93e2abde691f5800081cd858726 upstream. + +In order to call into the firmware to apply workarounds, it is +useful to find out whether we're using HVC or SMC. Let's expose +this through the psci_ops. + +Acked-by: Lorenzo Pieralisi +Reviewed-by: Robin Murphy +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/psci.c | 28 +++++++++++++++++++++++----- + include/linux/psci.h | 7 +++++++ + 2 files changed, 30 insertions(+), 5 deletions(-) + +--- a/drivers/firmware/psci.c ++++ b/drivers/firmware/psci.c +@@ -59,7 +59,9 @@ bool psci_tos_resident_on(int cpu) + return cpu == resident_cpu; + } + +-struct psci_operations psci_ops; ++struct psci_operations psci_ops = { ++ .conduit = PSCI_CONDUIT_NONE, ++}; + + typedef unsigned long (psci_fn)(unsigned long, unsigned long, + unsigned long, unsigned long); +@@ -210,6 +212,22 @@ static unsigned long psci_migrate_info_u + 0, 0, 0); + } + ++static void set_conduit(enum psci_conduit conduit) ++{ ++ switch (conduit) { ++ case PSCI_CONDUIT_HVC: ++ invoke_psci_fn = __invoke_psci_fn_hvc; ++ break; ++ case PSCI_CONDUIT_SMC: ++ invoke_psci_fn = __invoke_psci_fn_smc; ++ break; ++ default: ++ WARN(1, "Unexpected PSCI conduit %d\n", conduit); ++ } ++ ++ psci_ops.conduit = conduit; ++} ++ + static int get_set_conduit_method(struct device_node *np) + { + const char *method; +@@ -222,9 +240,9 @@ static int get_set_conduit_method(struct + } + + if (!strcmp("hvc", method)) { +- invoke_psci_fn = __invoke_psci_fn_hvc; ++ set_conduit(PSCI_CONDUIT_HVC); + } else if (!strcmp("smc", method)) { +- invoke_psci_fn = __invoke_psci_fn_smc; ++ set_conduit(PSCI_CONDUIT_SMC); + } else { + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; +@@ -654,9 +672,9 @@ int __init psci_acpi_init(void) + pr_info("probing for conduit method from ACPI.\n"); + + if (acpi_psci_use_hvc()) +- invoke_psci_fn = __invoke_psci_fn_hvc; ++ set_conduit(PSCI_CONDUIT_HVC); + else +- invoke_psci_fn = __invoke_psci_fn_smc; ++ set_conduit(PSCI_CONDUIT_SMC); + + return psci_probe(); + } +--- a/include/linux/psci.h ++++ b/include/linux/psci.h +@@ -25,6 +25,12 @@ bool psci_tos_resident_on(int cpu); + int psci_cpu_init_idle(unsigned int cpu); + int psci_cpu_suspend_enter(unsigned long index); + ++enum psci_conduit { ++ PSCI_CONDUIT_NONE, ++ PSCI_CONDUIT_SMC, ++ PSCI_CONDUIT_HVC, ++}; ++ + struct psci_operations { + u32 (*get_version)(void); + int (*cpu_suspend)(u32 state, unsigned long entry_point); +@@ -34,6 +40,7 @@ struct psci_operations { + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); ++ enum psci_conduit conduit; + }; + + extern struct psci_operations psci_ops; diff --git a/queue-4.9/firmware-psci-expose-smccc-version-through-psci_ops.patch b/queue-4.9/firmware-psci-expose-smccc-version-through-psci_ops.patch new file mode 100644 index 00000000000..2042f419bd4 --- /dev/null +++ b/queue-4.9/firmware-psci-expose-smccc-version-through-psci_ops.patch @@ -0,0 +1,104 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:34 +0100 +Subject: [PATCH v4.9.y 38/42] firmware/psci: Expose SMCCC version through psci_ops +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-39-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Marc Zyngier + +commit e78eef554a912ef6c1e0bbf97619dafbeae3339f upstream. + +Since PSCI 1.0 allows the SMCCC version to be (indirectly) probed, +let's do that at boot time, and expose the version of the calling +convention as part of the psci_ops structure. + +Acked-by: Lorenzo Pieralisi +Reviewed-by: Robin Murphy +Tested-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Signed-off-by: Catalin Marinas +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/psci.c | 27 +++++++++++++++++++++++++++ + include/linux/psci.h | 6 ++++++ + 2 files changed, 33 insertions(+) + +--- a/drivers/firmware/psci.c ++++ b/drivers/firmware/psci.c +@@ -61,6 +61,7 @@ bool psci_tos_resident_on(int cpu) + + struct psci_operations psci_ops = { + .conduit = PSCI_CONDUIT_NONE, ++ .smccc_version = SMCCC_VERSION_1_0, + }; + + typedef unsigned long (psci_fn)(unsigned long, unsigned long, +@@ -511,6 +512,31 @@ static void __init psci_init_migrate(voi + pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid); + } + ++static void __init psci_init_smccc(void) ++{ ++ u32 ver = ARM_SMCCC_VERSION_1_0; ++ int feature; ++ ++ feature = psci_features(ARM_SMCCC_VERSION_FUNC_ID); ++ ++ if (feature != PSCI_RET_NOT_SUPPORTED) { ++ u32 ret; ++ ret = invoke_psci_fn(ARM_SMCCC_VERSION_FUNC_ID, 0, 0, 0); ++ if (ret == ARM_SMCCC_VERSION_1_1) { ++ psci_ops.smccc_version = SMCCC_VERSION_1_1; ++ ver = ret; ++ } ++ } ++ ++ /* ++ * Conveniently, the SMCCC and PSCI versions are encoded the ++ * same way. No, this isn't accidental. ++ */ ++ pr_info("SMC Calling Convention v%d.%d\n", ++ PSCI_VERSION_MAJOR(ver), PSCI_VERSION_MINOR(ver)); ++ ++} ++ + static void __init psci_0_2_set_functions(void) + { + pr_info("Using standard PSCI v0.2 function IDs\n"); +@@ -559,6 +585,7 @@ static int __init psci_probe(void) + psci_init_migrate(); + + if (PSCI_VERSION_MAJOR(ver) >= 1) { ++ psci_init_smccc(); + psci_init_cpu_suspend(); + psci_init_system_suspend(); + } +--- a/include/linux/psci.h ++++ b/include/linux/psci.h +@@ -31,6 +31,11 @@ enum psci_conduit { + PSCI_CONDUIT_HVC, + }; + ++enum smccc_version { ++ SMCCC_VERSION_1_0, ++ SMCCC_VERSION_1_1, ++}; ++ + struct psci_operations { + u32 (*get_version)(void); + int (*cpu_suspend)(u32 state, unsigned long entry_point); +@@ -41,6 +46,7 @@ struct psci_operations { + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); + enum psci_conduit conduit; ++ enum smccc_version smccc_version; + }; + + extern struct psci_operations psci_ops; diff --git a/queue-4.9/mm-introduce-lm_alias.patch b/queue-4.9/mm-introduce-lm_alias.patch new file mode 100644 index 00000000000..3e91c257aef --- /dev/null +++ b/queue-4.9/mm-introduce-lm_alias.patch @@ -0,0 +1,44 @@ +From foo@baz Tue Apr 17 14:06:43 CEST 2018 +From: Mark Rutland +Date: Thu, 12 Apr 2018 12:11:15 +0100 +Subject: [PATCH v4.9.y 19/42] mm: Introduce lm_alias +To: stable@vger.kernel.org +Cc: mark.brown@linaro.org, ard.biesheuvel@linaro.org, marc.zyngier@arm.com, will.deacon@arm.com, catalin.marinas@arm.com, ghackmann@google.com, shankerd@codeaurora.org +Message-ID: <20180412111138.40990-20-mark.rutland@arm.com> + +From: Mark Rutland + + +From: Laura Abbott + +commit 568c5fe5a54f2654f5a4c599c45b8a62ed9a2013 upstream. + +Certain architectures may have the kernel image mapped separately to +alias the linear map. Introduce a macro lm_alias to translate a kernel +image symbol into its linear alias. This is used in part with work to +add CONFIG_DEBUG_VIRTUAL support for arm64. + +Reviewed-by: Mark Rutland +Tested-by: Mark Rutland +Signed-off-by: Laura Abbott +Signed-off-by: Will Deacon +Signed-off-by: Mark Rutland [v4.9 backport] +Tested-by: Greg Hackmann +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_m + #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) + #endif + ++#ifndef lm_alias ++#define lm_alias(x) __va(__pa_symbol(x)) ++#endif ++ + /* + * To prevent common memory management code establishing + * a zero page mapping on a read fault. diff --git a/queue-4.9/series b/queue-4.9/series index ffbd0206855..8c8f18e7223 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -7,3 +7,44 @@ perf-intel-pt-fix-error-recovery-from-missing-tip-packet.patch perf-intel-pt-fix-timestamp-following-overflow.patch perf-core-fix-use-after-free-in-uprobe_perf_close.patch radeon-hide-pointless-warning-when-compile-testing.patch +arm64-barrier-add-csdb-macros-to-control-data-value-prediction.patch +arm64-implement-array_index_mask_nospec.patch +arm64-move-task_-definitions-to-asm-processor.h.patch +arm64-make-user_ds-an-inclusive-limit.patch +arm64-use-pointer-masking-to-limit-uaccess-speculation.patch +arm64-entry-ensure-branch-through-syscall-table-is-bounded-under-speculation.patch +arm64-uaccess-prevent-speculative-use-of-the-current-addr_limit.patch +arm64-uaccess-don-t-bother-eliding-access_ok-checks-in-__-get-put-_user.patch +arm64-uaccess-mask-__user-pointers-for-__arch_-clear-copy_-_user.patch +arm64-cpufeature-__this_cpu_has_cap-shouldn-t-stop-early.patch +arm64-run-enable-method-for-errata-work-arounds-on-late-cpus.patch +arm64-cpufeature-pass-capability-structure-to-enable-callback.patch +drivers-firmware-expose-psci_get_version-through-psci_ops-structure.patch +arm64-factor-out-ttbr0_el1-post-update-workaround-into-a-specific-asm-macro.patch +arm64-move-post_ttbr_update_workaround-to-c-code.patch +arm64-add-skeleton-to-harden-the-branch-predictor-against-aliasing-attacks.patch +arm64-move-bp-hardening-to-check_and_switch_context.patch +mm-introduce-lm_alias.patch +arm64-kvm-use-per-cpu-vector-when-bp-hardening-is-enabled.patch +arm64-entry-apply-bp-hardening-for-high-priority-synchronous-exceptions.patch +arm64-entry-apply-bp-hardening-for-suspicious-interrupts-from-el0.patch +arm64-cputype-add-missing-midr-values-for-cortex-a72-and-cortex-a75.patch +arm64-cpu_errata-allow-an-erratum-to-be-match-for-all-revisions-of-a-core.patch +arm64-implement-branch-predictor-hardening-for-affected-cortex-a-cpus.patch +arm64-branch-predictor-hardening-for-cavium-thunderx2.patch +arm64-kvm-increment-pc-after-handling-an-smc-trap.patch +arm-arm64-kvm-consolidate-the-psci-include-files.patch +arm-arm64-kvm-add-psci_version-helper.patch +arm-arm64-kvm-add-smccc-accessors-to-psci-code.patch +arm-arm64-kvm-implement-psci-1.0-support.patch +arm-arm64-kvm-advertise-smccc-v1.1.patch +arm64-kvm-make-psci_version-a-fast-path.patch +arm-arm64-kvm-turn-kvm_psci_version-into-a-static-inline.patch +arm64-kvm-report-smccc_arch_workaround_1-bp-hardening-support.patch +arm64-kvm-add-smccc_arch_workaround_1-fast-handling.patch +firmware-psci-expose-psci-conduit.patch +firmware-psci-expose-smccc-version-through-psci_ops.patch +arm-arm64-smccc-make-function-identifiers-an-unsigned-quantity.patch +arm-arm64-smccc-implement-smccc-v1.1-inline-primitive.patch +arm64-add-arm_smccc_arch_workaround_1-bp-hardening-support.patch +arm64-kill-psci_get_version-as-a-variant-2-workaround.patch