From: Greg Kroah-Hartman Date: Sat, 14 Nov 2020 12:15:22 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.4.244~65 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4fe21603b5dba4dd1c266cc1ee27ba0980630225;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: crypto-arm64-aes-modes-get-rid-of-literal-load-of-addend-vector.patch --- diff --git a/queue-4.19/crypto-arm64-aes-modes-get-rid-of-literal-load-of-addend-vector.patch b/queue-4.19/crypto-arm64-aes-modes-get-rid-of-literal-load-of-addend-vector.patch new file mode 100644 index 00000000000..d7cb3d18f8d --- /dev/null +++ b/queue-4.19/crypto-arm64-aes-modes-get-rid-of-literal-load-of-addend-vector.patch @@ -0,0 +1,57 @@ +From ed6ed11830a9ded520db31a6e2b69b6b0a1eb0e2 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 23 Aug 2018 17:48:45 +0100 +Subject: crypto: arm64/aes-modes - get rid of literal load of addend vector + +From: Ard Biesheuvel + +commit ed6ed11830a9ded520db31a6e2b69b6b0a1eb0e2 upstream. + +Replace the literal load of the addend vector with a sequence that +performs each add individually. This sequence is only 2 instructions +longer than the original, and 2% faster on Cortex-A53. + +This is an improvement by itself, but also works around a Clang issue, +whose integrated assembler does not implement the GNU ARM asm syntax +completely, and does not support the =literal notation for FP registers +(more info at https://bugs.llvm.org/show_bug.cgi?id=38642) + +Cc: Nick Desaulniers +Signed-off-by: Ard Biesheuvel +Reviewed-by: Nick Desaulniers +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/crypto/aes-modes.S | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/arch/arm64/crypto/aes-modes.S ++++ b/arch/arm64/crypto/aes-modes.S +@@ -232,17 +232,19 @@ AES_ENTRY(aes_ctr_encrypt) + bmi .Lctr1x + cmn w6, #4 /* 32 bit overflow? */ + bcs .Lctr1x +- ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ +- dup v7.4s, w6 ++ add w7, w6, #1 + mov v0.16b, v4.16b +- add v7.4s, v7.4s, v8.4s ++ add w8, w6, #2 + mov v1.16b, v4.16b +- rev32 v8.16b, v7.16b ++ add w9, w6, #3 + mov v2.16b, v4.16b ++ rev w7, w7 + mov v3.16b, v4.16b +- mov v1.s[3], v8.s[0] +- mov v2.s[3], v8.s[1] +- mov v3.s[3], v8.s[2] ++ rev w8, w8 ++ mov v1.s[3], w7 ++ rev w9, w9 ++ mov v2.s[3], w8 ++ mov v3.s[3], w9 + ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ + bl aes_encrypt_block4x + eor v0.16b, v5.16b, v0.16b diff --git a/queue-4.19/series b/queue-4.19/series index 57b004d3bd6..47ad7d8cc88 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -26,3 +26,4 @@ tpm-efi-don-t-create-binary_bios_measurements-file-f.patch btrfs-fix-missing-error-return-if-writeback-for-exte.patch ath9k_htc-use-appropriate-rs_datalen-type.patch netfilter-use-actual-socket-sk-rather-than-skb-sk-when-routing-harder.patch +crypto-arm64-aes-modes-get-rid-of-literal-load-of-addend-vector.patch