From: Sasha Levin Date: Thu, 18 Mar 2021 13:39:14 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v4.19.182~27 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=76bdd0a8b862c5692b6bbcfda2b0cfbd82945571;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch b/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch new file mode 100644 index 00000000000..de3f0fa41fc --- /dev/null +++ b/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch @@ -0,0 +1,214 @@ +From 84af8569239f69c0ebb4b747ea58074d5d3e9b93 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Nov 2020 10:44:52 +0100 +Subject: crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg + +From: Uros Bizjak + +[ Upstream commit 032d049ea0f45b45c21f3f02b542aa18bc6b6428 ] + +CMP $0,%reg can't set overflow flag, so we can use shorter TEST %reg,%reg +instruction when only zero and sign flags are checked (E,L,LE,G,GE conditions). + +Signed-off-by: Uros Bizjak +Cc: Herbert Xu +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +--- + arch/x86/crypto/aesni-intel_asm.S | 20 ++++++++++---------- + arch/x86/crypto/aesni-intel_avx-x86_64.S | 20 ++++++++++---------- + 2 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S +index 9afeb58c910e..fb8ad276128a 100644 +--- a/arch/x86/crypto/aesni-intel_asm.S ++++ b/arch/x86/crypto/aesni-intel_asm.S +@@ -319,7 +319,7 @@ _initial_blocks_\@: + + # Main loop - Encrypt/Decrypt remaining blocks + +- cmp $0, %r13 ++ test %r13, %r13 + je _zero_cipher_left_\@ + sub $64, %r13 + je _four_cipher_left_\@ +@@ -438,7 +438,7 @@ _multiple_of_16_bytes_\@: + + mov PBlockLen(%arg2), %r12 + +- cmp $0, %r12 ++ test %r12, %r12 + je _partial_done\@ + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 +@@ -475,7 +475,7 @@ _T_8_\@: + add $8, %r10 + sub $8, %r11 + psrldq $8, %xmm0 +- cmp $0, %r11 ++ test %r11, %r11 + je _return_T_done_\@ + _T_4_\@: + movd %xmm0, %eax +@@ -483,7 +483,7 @@ _T_4_\@: + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 +- cmp $0, %r11 ++ test %r11, %r11 + je _return_T_done_\@ + _T_123_\@: + movd %xmm0, %eax +@@ -620,7 +620,7 @@ _get_AAD_blocks\@: + + /* read the last <16B of AAD */ + _get_AAD_rest\@: +- cmp $0, %r11 ++ test %r11, %r11 + je _get_AAD_done\@ + + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 +@@ -641,7 +641,7 @@ _get_AAD_done\@: + .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH operation + mov PBlockLen(%arg2), %r13 +- cmp $0, %r13 ++ test %r13, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN +@@ -693,7 +693,7 @@ _no_extra_mask_1_\@: + PSHUFB_XMM %xmm2, %xmm3 + pxor %xmm3, \AAD_HASH + +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block +@@ -728,7 +728,7 @@ _no_extra_mask_2_\@: + PSHUFB_XMM %xmm2, %xmm9 + pxor %xmm9, \AAD_HASH + +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block +@@ -748,7 +748,7 @@ _encode_done_\@: + PSHUFB_XMM %xmm2, %xmm9 + .endif + # output encrypted Bytes +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 +@@ -2731,7 +2731,7 @@ ENDPROC(aesni_ctr_enc) + */ + ENTRY(aesni_xts_crypt8) + FRAME_BEGIN +- cmpb $0, %cl ++ testb %cl, %cl + movl $0, %ecx + movl $240, %r10d + leaq _aesni_enc4, %r11 +diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S +index 91c039ab5699..4e4d34956170 100644 +--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S ++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S +@@ -370,7 +370,7 @@ _initial_num_blocks_is_0\@: + + + _initial_blocks_encrypted\@: +- cmp $0, %r13 ++ test %r13, %r13 + je _zero_cipher_left\@ + + sub $128, %r13 +@@ -529,7 +529,7 @@ _multiple_of_16_bytes\@: + vmovdqu HashKey(arg2), %xmm13 + + mov PBlockLen(arg2), %r12 +- cmp $0, %r12 ++ test %r12, %r12 + je _partial_done\@ + + #GHASH computation for the last <16 Byte block +@@ -574,7 +574,7 @@ _T_8\@: + add $8, %r10 + sub $8, %r11 + vpsrldq $8, %xmm9, %xmm9 +- cmp $0, %r11 ++ test %r11, %r11 + je _return_T_done\@ + _T_4\@: + vmovd %xmm9, %eax +@@ -582,7 +582,7 @@ _T_4\@: + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 +- cmp $0, %r11 ++ test %r11, %r11 + je _return_T_done\@ + _T_123\@: + vmovd %xmm9, %eax +@@ -626,7 +626,7 @@ _get_AAD_blocks\@: + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu \T8, \T7 +- cmp $0, %r11 ++ test %r11, %r11 + je _get_AAD_done\@ + + vpxor \T7, \T7, \T7 +@@ -645,7 +645,7 @@ _get_AAD_rest8\@: + vpxor \T1, \T7, \T7 + jmp _get_AAD_rest8\@ + _get_AAD_rest4\@: +- cmp $0, %r11 ++ test %r11, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 +@@ -750,7 +750,7 @@ _done_read_partial_block_\@: + .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH ENC_DEC + mov PBlockLen(arg2), %r13 +- cmp $0, %r13 ++ test %r13, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN +@@ -802,7 +802,7 @@ _no_extra_mask_1_\@: + vpshufb %xmm2, %xmm3, %xmm3 + vpxor %xmm3, \AAD_HASH, \AAD_HASH + +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block +@@ -837,7 +837,7 @@ _no_extra_mask_2_\@: + vpshufb %xmm2, %xmm9, %xmm9 + vpxor %xmm9, \AAD_HASH, \AAD_HASH + +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block +@@ -857,7 +857,7 @@ _encode_done_\@: + vpshufb %xmm2, %xmm9, %xmm9 + .endif + # output encrypted Bytes +- cmp $0, %r10 ++ test %r10, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 +-- +2.30.1 + diff --git a/queue-5.4/kvm-arm64-nvhe-save-the-spe-context-early.patch b/queue-5.4/kvm-arm64-nvhe-save-the-spe-context-early.patch new file mode 100644 index 00000000000..a25cff017a7 --- /dev/null +++ b/queue-5.4/kvm-arm64-nvhe-save-the-spe-context-early.patch @@ -0,0 +1,142 @@ +From bab1bd2c604ac0b291fa325255cb884dd02bc6da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Mar 2021 18:33:53 +0000 +Subject: KVM: arm64: nvhe: Save the SPE context early + +From: Suzuki K Poulose + +commit b96b0c5de685df82019e16826a282d53d86d112c upstream + +The nVHE KVM hyp drains and disables the SPE buffer, before +entering the guest, as the EL1&0 translation regime +is going to be loaded with that of the guest. + +But this operation is performed way too late, because : + - The owning translation regime of the SPE buffer + is transferred to EL2. (MDCR_EL2_E2PB == 0) + - The guest Stage1 is loaded. + +Thus the flush could use the host EL1 virtual address, +but use the EL2 translations instead of host EL1, for writing +out any cached data. + +Fix this by moving the SPE buffer handling early enough. +The restore path is doing the right thing. + +Cc: stable@vger.kernel.org # v5.4- +Cc: Christoffer Dall +Cc: Marc Zyngier +Cc: Will Deacon +Cc: Catalin Marinas +Cc: Mark Rutland +Cc: Alexandru Elisei +Signed-off-by: Suzuki K Poulose +Acked-by: Marc Zyngier +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/kvm_hyp.h | 3 +++ + arch/arm64/kvm/hyp/debug-sr.c | 24 +++++++++++++++--------- + arch/arm64/kvm/hyp/switch.c | 13 ++++++++++++- + 3 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h +index 97f21cc66657..7f7fdb16bb96 100644 +--- a/arch/arm64/include/asm/kvm_hyp.h ++++ b/arch/arm64/include/asm/kvm_hyp.h +@@ -71,6 +71,9 @@ void __sysreg32_restore_state(struct kvm_vcpu *vcpu); + + void __debug_switch_to_guest(struct kvm_vcpu *vcpu); + void __debug_switch_to_host(struct kvm_vcpu *vcpu); ++void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); ++void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); ++ + + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); + void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c +index 0fc9872a1467..aead8a5fbe91 100644 +--- a/arch/arm64/kvm/hyp/debug-sr.c ++++ b/arch/arm64/kvm/hyp/debug-sr.c +@@ -168,6 +168,21 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + } + ++void __hyp_text __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * Non-VHE: Disable and flush SPE data generation ++ * VHE: The vcpu can run, but it can't hide. ++ */ ++ __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); ++ ++} ++ ++void __hyp_text __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) ++{ ++ __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); ++} ++ + void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) + { + struct kvm_cpu_context *host_ctxt; +@@ -175,13 +190,6 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + +- /* +- * Non-VHE: Disable and flush SPE data generation +- * VHE: The vcpu can run, but it can't hide. +- */ +- if (!has_vhe()) +- __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); +- + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + return; + +@@ -201,8 +209,6 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + +- if (!has_vhe()) +- __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); + + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + return; +diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c +index 84964983198e..14607fac7ca3 100644 +--- a/arch/arm64/kvm/hyp/switch.c ++++ b/arch/arm64/kvm/hyp/switch.c +@@ -682,6 +682,15 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) + + __sysreg_save_state_nvhe(host_ctxt); + ++ /* ++ * We must flush and disable the SPE buffer for nVHE, as ++ * the translation regime(EL1&0) is going to be loaded with ++ * that of the guest. And we must do this before we change the ++ * translation regime to EL2 (via MDCR_EL2_EPB == 0) and ++ * before we load guest Stage1. ++ */ ++ __debug_save_host_buffers_nvhe(vcpu); ++ + __activate_vm(kern_hyp_va(vcpu->kvm)); + __activate_traps(vcpu); + +@@ -720,11 +729,13 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + ++ __debug_switch_to_host(vcpu); ++ + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ +- __debug_switch_to_host(vcpu); ++ __debug_restore_host_buffers_nvhe(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); +-- +2.30.1 + diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..5c5460f5bdc --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,2 @@ +crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch +kvm-arm64-nvhe-save-the-spe-context-early.patch