From 24f32f14e963fd2d73816e3c5c0bdef1a68be47a Mon Sep 17 00:00:00 2001 From: "fangming.fang" Date: Fri, 26 Jan 2024 10:48:17 +0000 Subject: [PATCH] Implement AES-CBC-HMAC-SHA512 on aarch64 This is to implement #19932, it adds enc-then-mac aes-cbc-hmac-sha512 on aarch64, aes-cbc and hmac-sha512 are interleaved to achieve better performance.It only supports non-padding mode that means the length of input data should be multiple of 16 bytes. Reviewed-by: Tomas Mraz Reviewed-by: Tom Cosgrove (Merged from https://github.com/openssl/openssl/pull/22949) --- crypto/aes/asm/aes-sha512-armv8.pl | 2967 +++++++++++++++++ crypto/aes/build.info | 4 +- crypto/objects/obj_dat.h | 15 +- crypto/objects/obj_mac.num | 3 + crypto/objects/objects.txt | 3 + include/crypto/aes_platform.h | 4 +- include/openssl/obj_mac.h | 12 + .../common/include/prov/providercommon.h | 1 + providers/defltprov.c | 12 +- providers/fips/fipsprov.c | 6 + providers/implementations/ciphers/build.info | 1 + .../ciphers/cipher_aes_cbc_hmac_sha1_etm_hw.c | 6 +- .../cipher_aes_cbc_hmac_sha256_etm_hw.c | 14 +- .../cipher_aes_cbc_hmac_sha512_etm_hw.c | 191 ++ .../ciphers/cipher_aes_cbc_hmac_sha_etm.c | 57 +- .../ciphers/cipher_aes_cbc_hmac_sha_etm.h | 9 +- .../include/prov/implementations.h | 3 + .../implementations/include/prov/names.h | 3 + test/evp_libctx_test.c | 2 +- test/evp_test.c | 29 +- .../30-test_evp_data/evpciph_aes_stitched.txt | 53 + 21 files changed, 3359 insertions(+), 36 deletions(-) create mode 100644 crypto/aes/asm/aes-sha512-armv8.pl create mode 100644 providers/implementations/ciphers/cipher_aes_cbc_hmac_sha512_etm_hw.c diff --git a/crypto/aes/asm/aes-sha512-armv8.pl b/crypto/aes/asm/aes-sha512-armv8.pl new file mode 100644 index 00000000000..f51ce8390da --- /dev/null +++ b/crypto/aes/asm/aes-sha512-armv8.pl @@ -0,0 +1,2967 @@ +#! /usr/bin/env perl + +# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour \"$output\"" + or die "can't call $xlate: $!"; +*STDOUT=*OUT; + +$code=<<___; +#include "arm_arch.h" + +# Theses are offsets into the CIPH_DIGEST struct +#define CIPHER_KEY 0 +#define CIPHER_KEY_ROUNDS 8 +#define CIPHER_IV 16 +#define HMAC_IKEYPAD 24 +#define HMAC_OKEYPAD 32 + +.text +.arch armv8-a+crypto +___ + +sub aes_block_9_rounds() { + my $i = shift; +$code.=<<___; + /* aes block $i */ + aese v$i.16b, v8.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v9.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v10.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v11.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v12.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v13.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v14.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v15.16b + aesmc v$i.16b, v$i.16b + aese v$i.16b, v16.16b + aesmc v$i.16b, v$i.16b +___ +} + +sub aes_block_last_rounds () { + my $compare = shift; + my $label = shift; + my $i = shift; + my $load_rk10 = shift; + + if($compare == 1) { +$code.=<<___; + cmp x9, #12 /* tell 128,192,256 apart */ +___ + } +$code.=<<___; + b.lt .Laes128_${label}_$i +.Laes192_${label}_$i: + ldp q18,q19,[x7],32 /* rk[10],rk[11] */ + aese v$i.16b,v17.16b + aesmc v$i.16b,v$i.16b + aese v$i.16b,v18.16b + aesmc v$i.16b,v$i.16b + b.gt .Laes256_${label}_$i + ld1 {v18.16b},[x7] /* rk[12] */ + aese v$i.16b,v19.16b + eor v$i.16b,v$i.16b,v18.16b + sub x7, x7, #32 /* rewind x7 */ + b 1f +.Laes256_${label}_$i: + aese v$i.16b,v19.16b + aesmc v$i.16b,v$i.16b + ldp q18,q19,[x7],32 /* rk[12],rk[13] */ + aese v$i.16b,v18.16b + aesmc v$i.16b,v$i.16b + ld1 {v18.16b},[x7] /* rk[14] */ + aese v$i.16b,v19.16b + eor v$i.16b,v$i.16b,v18.16b + sub x7, x7, #64 /* rewind x7 */ + b 1f +.Laes128_${label}_$i: +___ + if ($load_rk10 == 1) { +$code.=<<___; + ld1 {v18.16b},[x7] +___ + } +$code.=<<___; + aese v$i.16b,v17.16b + eor v$i.16b,v$i.16b,v18.16b /* res */ +1: +___ +} + +sub aes_block_dec_9_rounds() { + my $i = shift; +$code.=<<___; + /* aes block $i */ + aesd v$i.16b, v8.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v9.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v10.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v11.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v12.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v13.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v14.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v15.16b + aesimc v$i.16b, v$i.16b + aesd v$i.16b, v16.16b + aesimc v$i.16b, v$i.16b +___ +} + +sub aes_block_dec_last_rounds () { + my $compare = shift; + my $label = shift; + my $i = shift; + my $load_rk10 = shift; + + if($compare == 1) { +$code.=<<___; + cmp x9, #12 /* tell 128,192,256 apart */ +___ + } +$code.=<<___; + b.lt .Laes128_${label}_$i +.Laes192_${label}_$i: + ldp q18,q19,[x7],32 /* rk[10],rk[11] */ + aesd v$i.16b,v17.16b + aesimc v$i.16b,v$i.16b + aesd v$i.16b,v18.16b + aesimc v$i.16b,v$i.16b + b.gt .Laes256_${label}_$i + ld1 {v18.16b},[x7] /* rk[12] */ + aesd v$i.16b,v19.16b + eor v$i.16b,v$i.16b,v18.16b + sub x7, x7, #32 /* rewind x7 */ + b 1f +.Laes256_${label}_$i: + aesd v$i.16b,v19.16b + aesimc v$i.16b,v$i.16b + ldp q18,q19,[x7],32 /* rk[12],rk[13] */ + aesd v$i.16b,v18.16b + aesimc v$i.16b,v$i.16b + ld1 {v18.16b},[x7] /* rk[14] */ + aesd v$i.16b,v19.16b + eor v$i.16b,v$i.16b,v18.16b + sub x7, x7, #64 /* rewind x7 */ + b 1f +.Laes128_${label}_$i: +___ + if ($load_rk10 == 1) { +$code.=<<___; + ld1 {v18.16b},[x7] +___ + } +$code.=<<___; + aesd v$i.16b,v17.16b + eor v$i.16b,v$i.16b,v18.16b /* res */ +1: +___ +} + +sub sha512_block() { + my @H = map("v$_",(24..28)); + my @QH = map("q$_",(24..28)); + my ($FG, $DE) = map("v$_",(29..30)); + my ($QFG, $QDE) = map("q$_",(29..30)); + my $M9_10 = "v31"; + my @MSG = map("v$_", (0..7)); + my ($W0, $W1) = ("v8", "v9"); + my ($AB, $CD, $EF, $GH) = map("v$_",(20..23)); + my $need_revert = shift; + + if($need_revert == 1) { +$code.=<<___; + rev64 @MSG[0].16b, @MSG[0].16b + rev64 @MSG[1].16b, @MSG[1].16b + rev64 @MSG[2].16b, @MSG[2].16b + rev64 @MSG[3].16b, @MSG[3].16b + rev64 @MSG[4].16b, @MSG[4].16b + rev64 @MSG[5].16b, @MSG[5].16b + rev64 @MSG[6].16b, @MSG[6].16b + rev64 @MSG[7].16b, @MSG[7].16b +___ + } +$code.=<<___; + /* load const k */ + ld1 {$W0.2d}, [x10], #16 + + /* backup ABCDEFGH */ + mov $AB.16b, @H[0].16b + mov $CD.16b, @H[1].16b + mov $EF.16b, @H[2].16b + mov $GH.16b, @H[3].16b +___ +for($i = 0; $i < 32; $i++) { +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16)*/ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +} +for(;$i<40;$i++) { +$code.=<<___ if ($i<39); + ld1 {$W1.2d},[x10],#16 +___ +$code.=<<___ if ($i==39); + sub x10, x10, #80*8 // rewind +___ +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +} +$code.=<<___; + add @H[0].2d, @H[0].2d, $AB.2d + add @H[1].2d, @H[1].2d, $CD.2d + add @H[2].2d, @H[2].2d, $EF.2d + add @H[3].2d, @H[3].2d, $GH.2d +___ +} + +{ + my @H = map("v$_",(24..28)); + my @QH = map("q$_",(24..28)); + my ($FG, $DE) = map("v$_",(29..30)); + my ($QFG, $QDE) = map("q$_",(29..30)); + my $M9_10 = "v31"; + my @MSG = map("v$_", (0..7)); + my ($W0, $W1) = ("v14", "v15"); + my ($AB, $CD, $EF, $GH) = map("v$_",(20..23)); + +$code.=<<___; +/* + * asm_aescbc_sha512_hmac( + * csrc, x0 (cipher src address) + * cdst, x1 (cipher dst address) + * clen x2 (cipher length) + * dsrc, x3 (digest src address) + * ddst, x4 (digest dst address) + * dlen, x5 (digest length) + * arg x6 : + * arg->cipher.key (round keys) + * arg->cipher.key_rounds (key rounds) + * arg->cipher.iv (initialization vector) + * arg->digest.hmac.i_key_pad (partially hashed i_key_pad) + * arg->digest.hmac.o_key_pad (partially hashed o_key_pad) + * ) + */ + +.global asm_aescbc_sha512_hmac +.type asm_aescbc_sha512_hmac,%function + +.align 6 +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator + + .align 4 +asm_aescbc_sha512_hmac: + AARCH64_VALID_CALL_TARGET + /* save callee save register */ + stp d8, d9, [sp,#-64]! + stp d10, d11, [sp,#16] + stp d12, d13, [sp,#32] + stp d14, d15, [sp,#48] + + /* load ABCDEFGH */ + ldr x7, [x6, #HMAC_IKEYPAD] + ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x7] + + ldr x7, [x6, #CIPHER_KEY] + ldr x8, [x6, #CIPHER_IV] + ldr x9, [x6, #CIPHER_KEY_ROUNDS] + mov x12, x7 /* backup x7 */ + + adr x10, .LK512 + + lsr x11, x2, #4 /* aes_block = len/16 */ + cbz x11, .Lret /* return if aes_block = 0 */ + + cmp x11, #16 + b.lt .Lenc_short_case + + ld1 {v0.16b}, [x0], #16 /* load plaintext */ + ld1 {v1.16b}, [x8] /* load iv */ + + eor v0.16b, v0.16b, v1.16b /* iv xor plaintext */ + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + /* block 0 */ + aese v0.16b, v8.16b + aesmc v0.16b, v0.16b + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + aese v0.16b, v9.16b + aesmc v0.16b, v0.16b + aese v0.16b, v10.16b + aesmc v0.16b, v0.16b + ldp q12, q13, [x7], #32 /* rk4, rk5 */ + aese v0.16b, v11.16b + aesmc v0.16b, v0.16b + aese v0.16b, v12.16b + aesmc v0.16b, v0.16b + ldp q14, q15, [x7], #32 /* rk6, rk7 */ + aese v0.16b, v13.16b + aesmc v0.16b, v0.16b + aese v0.16b, v14.16b + aesmc v0.16b, v0.16b + ldp q16, q17, [x7], #32 /* rk8, rk9 */ + aese v0.16b, v15.16b + aesmc v0.16b, v0.16b + aese v0.16b, v16.16b + aesmc v0.16b, v0.16b + ld1 {v18.16b}, [x7] /* rk10 */ +___ +&aes_block_last_rounds(1, "enc_prelog", 0, 0); +$code.=<<___; + str q0, [x1], #16 /* store cipher result */ + ld1 {v1.16b}, [x0], #16 /* load next block */ + eor v1.16b, v1.16b, v0.16b /* output xor block */ +___ +# process aes blocks from 1 to 7 +for($i = 1; $i < 8; $i = $i + 1) { + &aes_block_9_rounds($i); + &aes_block_last_rounds(0, "enc_prelog", $i, 0); + if($i != 7) { + $next = $i + 1; +$code.=<<___; + /* load next block */ + ld1 {v$next.16b}, [x0], #16 + /* output xor block */ + eor v$next.16b, v$next.16b, v$i.16b +___ + } +$code.=<<___; + str q$i, [x1], #16 /* store cipher result */ +___ +} +$code.=<<___; + sub x11, x11, #8 + +.Lenc_main_loop: + mov x7, x12 + mov x14, x1 + /* aes block 0 */ + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v12.16b}, [x0], #16 + eor v12.16b, v12.16b, v7.16b + + /* reverse message */ + rev64 @MSG[0].16b, @MSG[0].16b + rev64 @MSG[1].16b, @MSG[1].16b + rev64 @MSG[2].16b, @MSG[2].16b + rev64 @MSG[3].16b, @MSG[3].16b + rev64 @MSG[4].16b, @MSG[4].16b + rev64 @MSG[5].16b, @MSG[5].16b + rev64 @MSG[6].16b, @MSG[6].16b + rev64 @MSG[7].16b, @MSG[7].16b + ld1 {$W0.2d}, [x10], #16 /* load const k*/ + + /* backup ABCDEFGH */ + mov $AB.16b, @H[0].16b + mov $CD.16b, @H[1].16b + mov $EF.16b, @H[2].16b + mov $GH.16b, @H[3].16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + cmp x9, #12 + b.lt .Lenc_main_loop_aes128_0 +.Lenc_main_loop_aes192_0: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_0 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_0: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_0: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 1 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + cmp x9, #12 + b.lt .Lenc_main_loop_aes128_1 +.Lenc_main_loop_aes192_1: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_1 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_1: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_1: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 2 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + cmp x9, #12 + b.lt .Lenc_main_loop_aes128_2 +.Lenc_main_loop_aes192_2: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_2 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_2: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_2: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 3 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + cmp x9, #12 + b.lt .Lenc_main_loop_aes128_3 +.Lenc_main_loop_aes192_3: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_3 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_3: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_3: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 4 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + cmp x9, #12 + b.lt .Lenc_main_loop_aes128_4 +.Lenc_main_loop_aes192_4: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_4 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_4: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_4: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 5 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + cmp x9, #12 + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + b.lt .Lenc_main_loop_aes128_5 +.Lenc_main_loop_aes192_5: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_5 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_5: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_5: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 6 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ld1 {$W1.2d}, [x10], #16 /* load const k*/ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + ext $M9_10.16b, @MSG[4].16b, @MSG[5].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + /* Wt_PART1 = SSIG0(W(t-15)) + W(t-16) */ + sha512su0 @MSG[0].2d, @MSG[1].2d + /* Wt = SSIG1(W(t-2)) + W(t-7) + Wt_PART1 */ + sha512su1 @MSG[0].2d, @MSG[7].2d, $M9_10.2d + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + cmp x9, #12 + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + b.lt .Lenc_main_loop_aes128_6 +.Lenc_main_loop_aes192_6: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_6 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_6: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_6: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + st1 {v12.16b}, [x1], #16 + /* aes block 7 */ + mov x7, x12 + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ld1 {v13.16b}, [x0], #16 + eor v12.16b, v12.16b, v13.16b + + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + ld1 {$W1.2d},[x10],#16 + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + sub x10, x10, #80*8 // rewind + add $W0.2d, $W0.2d, $MSG[0].2d /* Kt + Wt */ + ext $W0.16b, $W0.16b, $W0.16b, #8 + ext $FG.16b, @H[2].16b, @H[3].16b, #8 + ext $DE.16b, @H[1].16b, @H[2].16b, #8 + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + /* T1 = h + Kt + Wt*/ + add @H[3].2d, @H[3].2d, $W0.2d + /* T1 = T1 + BSIG1(e) + CH(e,f,g) */ + sha512h @QH[3], $QFG, $DE.2d + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + cmp x9, #12 + add @H[4].2d, @H[1].2d, @H[3].2d /* d + T1 */ + /* T2 = BSIG0(a) + MAJ(a,b,c), T1 + T2 */ + sha512h2 @QH[3], @QH[1], @H[0].2d +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); + # h=g, g=f,f=e,e=d+T1,d=c,c=b,b=a,a=T1+T2 + @H = (@H[3],@H[0],@H[4],@H[2],@H[1]); + @QH = (@QH[3],@QH[0],@QH[4],@QH[2],@QH[1]); +$code.=<<___; + b.lt .Lenc_main_loop_aes128_7 +.Lenc_main_loop_aes192_7: + ldp q10, q11, [x7], #32 /* rk10, rk11 */ + aese v12.16b, v9.16b + aesmc v12.16b, v12.16b + aese v12.16b, v10.16b + aesmc v12.16b, v12.16b + b.gt .Lenc_main_loop_aes256_7 + ld1 {v8.16b},[x7] /* rk12 */ + aese v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + b 1f +.Lenc_main_loop_aes256_7: + ldp q8, q9, [x7], #32 /* rk12, rk13 */ + aese v12.16b, v11.16b + aesmc v12.16b, v12.16b + ld1 {v10.16b},[x7] /* rk14 */ + aese v12.16b, v8.16b + aesmc v12.16b, v12.16b + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + b 1f +.Lenc_main_loop_aes128_7: + ld1 {v10.16b},[x7] /* rk10 */ + aese v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b +1: + add @H[0].2d, @H[0].2d, $AB.2d + add @H[1].2d, @H[1].2d, $CD.2d + add @H[2].2d, @H[2].2d, $EF.2d + add @H[3].2d, @H[3].2d, $GH.2d + + st1 {v12.16b}, [x1], #16 + + ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x14], #64 + ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [x14] + + sub x11, x11, #8 + cmp x11, #8 + b.ge .Lenc_main_loop + + /* epilog - process sha block */ +___ + &sha512_block(1); +$code.=<<___; + mov x7, x12 + ld1 {v0.16b}, [x0], #16 /* load plaintext */ + ldr q1, [x14, #48] /* load the last output of aes block */ + eor v0.16b, v0.16b, v1.16b + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + ldp q12, q13, [x7], #32 /* rk4, rk5 */ + ldp q14, q15, [x7], #32 /* rk6, rk7 */ + ldp q16, q17, [x7], #32 /* rk8, rk9 */ + ld1 {v18.16b}, [x7] /* rk10 */ + + mov w12, #0x80 /* sha padding 0b10000000 */ + b .Lenc_less_than_8_block + + /* aes_block < 16 */ +.Lenc_short_case: + ld1 {v0.16b}, [x0], #16 /* load plaintext */ + ld1 {v1.16b}, [x8] /* load iv */ + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + ldp q12, q13, [x7], #32 /* rk4, rk5 */ + ldp q14, q15, [x7], #32 /* rk6, rk7 */ + ldp q16, q17, [x7], #32 /* rk8, rk9 */ + ld1 {v18.16b}, [x7] /* rk10 */ + mov w12, #0x80 /* sha padding 0b10000000 */ + + eor v0.16b, v0.16b, v1.16b /* iv xor plaintext */ + + cmp x11, #8 + b.lt .Lenc_less_than_8_block +___ +# process 8 aes blocks +for($i = 0; $i < 8; $i = $i + 1) { + &aes_block_9_rounds($i); + # only tell 128/192/256 at the first time + &aes_block_last_rounds(($i == 0)?1:0, "enc_short", $i, 0); + if($i != 7) { + $next = $i + 1; +$code.=<<___; + /* load next block */ + ld1 {v$next.16b}, [x0], #16 + /* output xor block */ + eor v$next.16b, v$next.16b, v$i.16b +___ + } +} +$code.=<<___; + /* store 8 blocks of ciphertext */ + stp q0, q1, [x1], #32 + stp q2, q3, [x1], #32 + stp q4, q5, [x1], #32 + stp q6, q7, [x1], #32 + + sub x11, x11, #8 +___ + # now we have a whole sha512 block + &sha512_block(1); +$code.=<<___; + ldr x7, [x6, #CIPHER_KEY] + ldp q8, q9, [x7] /* restore clobbered rk0, rk1 */ + add x7, x7, #160 /* x7 point to rk10 */ + cbz x11, .Lenc_short_no_more_aes_block + ld1 {v0.16b}, [x0], #16 /* load plaintext */ + ldr q1, [x1, -16] + eor v0.16b, v0.16b, v1.16b +.Lenc_less_than_8_block: + cbz x11, .Lenc_short_no_more_aes_block +___ +# process remained aes blocks (<= 7) +for($i = 0; $i < 7; $i = $i + 1) { + &aes_block_9_rounds($i); + &aes_block_last_rounds(($i == 0)?1:0, "enc_short_partial", $i, 0); +$code.=<<___; + str q$i, [x1], #16 + sub x11, x11, #1 + cbz x11, .Lenc_short_post_Q$i +___ + if($i != 6) { + $next = $i + 1; +$code.=<<___; + /* load next block*/ + ld1 {v$next.16b}, [x0], #16 + /* output xor block */ + eor v$next.16b, v$next.16b, v$i.16b +___ + } +} +$code.=<<___; +.Lenc_short_no_more_aes_block: + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v0.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q0: + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v1.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q1: + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v2.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q2: + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v3.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q3: + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v4.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q4: + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v5.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q5: + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v6.b[0], w12 + b .Lenc_short_post_sha +.Lenc_short_post_Q6: + eor v7.16b, v7.16b, v7.16b + mov v7.b[0], w12 + /* we have one padded sha512 block now, process it and + then employ another one to host sha length */ +___ +&sha512_block(1); +$code.=<<___; + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b +.Lenc_short_post_sha: + /* we have last padded sha512 block now */ + eor x13, x13, x13 /* length_lo */ + eor x14, x14, x14 /* length_hi */ + + adds x13, x13, x2, lsl #3 /* add len in bits */ + lsr x15, x2, #61 + adc x14, x14, x15 + + adds x13, x13, #1024 /* add i_key_pad 1024 bits */ + adc x14, x14, xzr + + mov v7.d[0], x14 + mov v7.d[1], x13 + rev64 v7.16b, v7.16b +___ +&sha512_block(1); +$code.=<<___; + /* Final HMAC - opad part */ + mov v0.16b, v24.16b + mov v1.16b, v25.16b + mov v2.16b, v26.16b + mov v3.16b, v27.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + + mov v4.b[7], w12 /* padding 1 */ + mov x13, #1024+512 /* length in bits */ + mov v7.d[1], x13 + + /* load ABCDEFGH for opad */ + ldr x7, [x6, #HMAC_OKEYPAD] + ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x7] +___ +&sha512_block(0); +$code.=<<___; +.Lret: + mov x0, xzr /* return 0 */ + + rev64 v24.16b, v24.16b + rev64 v25.16b, v25.16b + rev64 v26.16b, v26.16b + rev64 v27.16b, v27.16b + + /* store hash result */ + st1 {v24.2d,v25.2d,v26.2d,v27.2d},[x4] + + /* restore callee save register */ + ldp d10, d11, [sp,#16] + ldp d12, d13, [sp,#32] + ldp d14, d15, [sp,#48] + ldp d8, d9, [sp], #64 + ret +.size asm_aescbc_sha512_hmac, .-asm_aescbc_sha512_hmac +___ +} + +{ + my @H = map("v$_",(24..28)); + my @QH = map("q$_",(24..28)); + my ($FG, $DE) = map("v$_",(29..30)); + my ($QFG, $QDE) = map("q$_",(29..30)); + my $M9_10 = "v31"; + my @MSG = map("v$_", (0..7)); + my ($W0, $W1) = ("v14", "v15"); + my ($AB, $CD, $EF, $GH) = map("v$_",(20..23)); + +$code.=<<___; +/* + * asm_sha512_hmac_aescbc_dec( + * csrc, x0 (cipher src address) + * cdst, x1 (cipher dst address) + * clen x2 (cipher length) + * dsrc, x3 (digest src address) + * ddst, x4 (digest dst address) + * dlen, x5 (digest length) + * arg x6 : + * arg->cipher.key (round keys) + * arg->cipher.key_rounds (key rounds) + * arg->cipher.iv (initialization vector) + * arg->digest.hmac.i_key_pad (partially hashed i_key_pad) + * arg->digest.hmac.o_key_pad (partially hashed o_key_pad) + * ) + */ + +.global asm_sha512_hmac_aescbc_dec +.type asm_sha512_hmac_aescbc_dec,%function + +.align 4 +asm_sha512_hmac_aescbc_dec: + AARCH64_VALID_CALL_TARGET + /* save callee save register */ + stp d8, d9, [sp,#-64]! + stp d10, d11, [sp,#16] + stp d12, d13, [sp,#32] + stp d14, d15, [sp,#48] + + /* load ABCDEFGH */ + ldr x7, [x6, #HMAC_IKEYPAD] + ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x7] + + ldr x7, [x6, #CIPHER_KEY] + ldr x8, [x6, #CIPHER_IV] + ldr x9, [x6, #CIPHER_KEY_ROUNDS] + mov x12, x7 /* backup x7 */ + + adr x10, .LK512 + + lsr x11, x2, #4 /* aes_block = len/16 */ + cbz x11, .Ldec_ret /* return if aes_block = 0 */ + + ld1 {v20.16b}, [x8] /* load iv */ + cmp x11, #8 + b.lt .Ldec_short_case +.Ldec_main_loop: + ldp q12, q13, [x0], #32 + ldp q14, q15, [x0], #32 + ldp q16, q17, [x0], #32 + ldp q18, q19, [x0], #32 + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + mov v0.16b, v12.16b + mov v1.16b, v13.16b + mov v2.16b, v14.16b + mov v3.16b, v15.16b + mov v4.16b, v16.16b + mov v5.16b, v17.16b + mov v6.16b, v18.16b + mov v7.16b, v19.16b + + /* 1 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v8.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v8.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v8.16b + aesimc v19.16b, v19.16b + + /* 2 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v9.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v9.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v9.16b + aesimc v19.16b, v19.16b + + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + + /* 3 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v10.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v10.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v10.16b + aesimc v19.16b, v19.16b + + /* 4 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v11.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v11.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v11.16b + aesimc v19.16b, v19.16b + + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + + /* 5 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v8.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v8.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v8.16b + aesimc v19.16b, v19.16b + + /* 6 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v9.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v9.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v9.16b + aesimc v19.16b, v19.16b + + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + + /* 7 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v10.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v10.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v10.16b + aesimc v19.16b, v19.16b + + /* 8 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v11.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v11.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v11.16b + aesimc v19.16b, v19.16b + + /* 9 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v8.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v8.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v8.16b + aesimc v19.16b, v19.16b + + cmp x9, #12 /* tell 128,192,256 apart */ + + b.lt .Laes128_dec_main +.Laes192_dec_main: + ldp q10,q11,[x7],32 /* rk10,rk11 */ + /* 10 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v9.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v9.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v9.16b + aesimc v19.16b, v19.16b + + /* 11 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v10.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v10.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v10.16b + aesimc v19.16b, v19.16b + b.gt .Laes256_dec_main + + ld1 {v8.16b},[x7] /* rk12 */ + + /*12 round */ + aesd v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + aesd v13.16b, v11.16b + eor v13.16b, v13.16b, v8.16b + aesd v14.16b, v11.16b + eor v14.16b, v14.16b, v8.16b + aesd v15.16b, v11.16b + eor v15.16b, v15.16b, v8.16b + aesd v16.16b, v11.16b + eor v16.16b, v16.16b, v8.16b + aesd v17.16b, v11.16b + eor v17.16b, v17.16b, v8.16b + aesd v18.16b, v11.16b + eor v18.16b, v18.16b, v8.16b + aesd v19.16b, v11.16b + eor v19.16b, v19.16b, v8.16b + + sub x7, x7, #192 /* rewind x7 */ + b 1f +.Laes256_dec_main: + ldp q8,q9,[x7],32 /* rk12,rk13 */ + /* 12 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v11.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v11.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v11.16b + aesimc v19.16b, v19.16b + + /* 13 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v17.16b, v8.16b + aesimc v17.16b, v17.16b + aesd v18.16b, v8.16b + aesimc v18.16b, v18.16b + aesd v19.16b, v8.16b + aesimc v19.16b, v19.16b + + ld1 {v10.16b},[x7] /* rk14 */ + + /* 14 round */ + aesd v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + aesd v13.16b, v9.16b + eor v13.16b, v13.16b, v10.16b + aesd v14.16b, v9.16b + eor v14.16b, v14.16b, v10.16b + aesd v15.16b, v9.16b + eor v15.16b, v15.16b, v10.16b + aesd v16.16b, v9.16b + eor v16.16b, v16.16b, v10.16b + aesd v17.16b, v9.16b + eor v17.16b, v17.16b, v10.16b + aesd v18.16b, v9.16b + eor v18.16b, v18.16b, v10.16b + aesd v19.16b, v9.16b + eor v19.16b, v19.16b, v10.16b + + sub x7, x7, #224 + b 1f +.Laes128_dec_main: + ld1 {v10.16b},[x7] /* rk10 */ + aesd v12.16b,v9.16b + eor v12.16b, v12.16b, v10.16b + aesd v13.16b,v9.16b + eor v13.16b, v13.16b, v10.16b + aesd v14.16b,v9.16b + eor v14.16b, v14.16b, v10.16b + aesd v15.16b,v9.16b + eor v15.16b, v15.16b, v10.16b + aesd v16.16b,v9.16b + eor v16.16b, v16.16b, v10.16b + aesd v17.16b,v9.16b + eor v17.16b, v17.16b, v10.16b + aesd v18.16b,v9.16b + eor v18.16b, v18.16b, v10.16b + aesd v19.16b,v9.16b + eor v19.16b, v19.16b, v10.16b + sub x7, x7, #160 + +1: + eor v12.16b, v12.16b, v20.16b + eor v13.16b, v13.16b, v0.16b + eor v14.16b, v14.16b, v1.16b + eor v15.16b, v15.16b, v2.16b + eor v16.16b, v16.16b, v3.16b + eor v17.16b, v17.16b, v4.16b + eor v18.16b, v18.16b, v5.16b + eor v19.16b, v19.16b, v6.16b + + stp q12,q13, [x1], #32 + ldr q12, [x0, #-16] /* load last cipher */ + stp q14,q15, [x1], #32 + stp q16,q17, [x1], #32 + stp q18,q19, [x1], #32 +___ + &sha512_block(1); +$code.=<<___; + mov v20.16b, v12.16b /* load last cipher */ + sub x11, x11, #8 + cmp x11, #8 + b.ge .Ldec_main_loop + + /* aes_block < 8 */ +.Ldec_short_case: + mov w12, #0x80 /* sha padding 0b10000000 */ + cbnz x11, 1f + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v0.b[0], w12 + b .Ldec_short_post_sha +1: + cmp x11, #4 + b.lt .Ldec_less_than_4_block + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + ldp q12, q13, [x0], #32 + ldp q14, q15, [x0], #32 + + mov v0.16b, v12.16b + mov v1.16b, v13.16b + mov v2.16b, v14.16b + mov v3.16b, v15.16b + + /* 1 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + + /* 2 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + + /* 3 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + + /* 4 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + + /* 5 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + + /* 6 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + + /* 7 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + + /* 8 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + + /* 9 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + + cmp x9, #12 /* tell 128,192,256 apart */ + + b.lt .Laes128_dec_short +.Laes192_dec_short: + ldp q10,q11,[x7],32 /* rk10,rk11 */ + + /* 10 round */ + aesd v12.16b, v9.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v9.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v9.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v9.16b + aesimc v15.16b, v15.16b + + /* 11 round */ + aesd v12.16b, v10.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v10.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v10.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v10.16b + aesimc v15.16b, v15.16b + b.gt .Laes256_dec_short + + ld1 {v8.16b},[x7] /* rk12 */ + + /*12 round */ + aesd v12.16b, v11.16b + eor v12.16b, v12.16b, v8.16b + aesd v13.16b, v11.16b + eor v13.16b, v13.16b, v8.16b + aesd v14.16b, v11.16b + eor v14.16b, v14.16b, v8.16b + aesd v15.16b, v11.16b + eor v15.16b, v15.16b, v8.16b + + sub x7, x7, #192 /* rewind x7 */ + b 1f +.Laes256_dec_short: + ldp q8,q9,[x7],32 /* rk12,rk13 */ + /* 12 round */ + aesd v12.16b, v11.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v11.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v11.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v11.16b + aesimc v15.16b, v15.16b + + /* 13 round */ + aesd v12.16b, v8.16b + aesimc v12.16b, v12.16b + aesd v13.16b, v8.16b + aesimc v13.16b, v13.16b + aesd v14.16b, v8.16b + aesimc v14.16b, v14.16b + aesd v15.16b, v8.16b + aesimc v15.16b, v15.16b + + ld1 {v10.16b},[x7] /* rk14 */ + + /* 14 round */ + aesd v12.16b, v9.16b + eor v12.16b, v12.16b, v10.16b + aesd v13.16b, v9.16b + eor v13.16b, v13.16b, v10.16b + aesd v14.16b, v9.16b + eor v14.16b, v14.16b, v10.16b + aesd v15.16b, v9.16b + eor v15.16b, v15.16b, v10.16b + + sub x7, x7, #224 + b 1f +.Laes128_dec_short: + ld1 {v10.16b},[x7] /* rk10 */ + aesd v12.16b,v9.16b + eor v12.16b, v12.16b, v10.16b + aesd v13.16b,v9.16b + eor v13.16b, v13.16b, v10.16b + aesd v14.16b,v9.16b + eor v14.16b, v14.16b, v10.16b + aesd v15.16b,v9.16b + eor v15.16b, v15.16b, v10.16b + sub x7, x7, #160 +1: + eor v12.16b, v12.16b, v20.16b + eor v13.16b, v13.16b, v0.16b + eor v14.16b, v14.16b, v1.16b + eor v15.16b, v15.16b, v2.16b + ldr q20, [x0, #-16] + + sub x11, x11, #4 + + stp q12,q13, [x1], #32 + stp q14,q15, [x1], #32 + cbz x11, .Ldec_short_post_Q3 +___ +for($i = 0; $i < 3; $i = $i + 1) { + $block = $i + 4; +$code.=<<___; + ld1 {v16.16b}, [x0], #16 + mov v$block.16b, v16.16b + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + cmp x9, #12 /* tell 128,192,256 apart */ + b.lt .Laes128_dec_short_$block +.Laes192_dec_short_$block: + ldp q10,q11,[x7],32 /* rk10,rk11 */ + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + b.gt .Laes256_dec_short_$block + ld1 {v8.16b},[x7] /* rk12 */ + aesd v16.16b, v11.16b + eor v16.16b, v16.16b, v8.16b + sub x7, x7, #192 /* rewind x7 */ + b 1f +.Laes256_dec_short_$block: + ldp q8,q9,[x7],32 /* rk12,rk13 */ + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + ld1 {v10.16b},[x7] /* rk14 */ + aesd v16.16b, v9.16b + eor v16.16b, v16.16b, v10.16b + sub x7, x7, #224 + b 1f +.Laes128_dec_short_$block: + ld1 {v10.16b},[x7] /* rk10 */ + aesd v16.16b,v9.16b + eor v16.16b, v16.16b, v10.16b + sub x7, x7, #160 +1: + sub x11, x11, 1 + eor v16.16b, v16.16b, v20.16b + ldr q20, [x0, #-16] + st1 {v16.16b}, [x1], #16 + cbz x11, .Ldec_short_post_Q$block +___ +} +$code.=<<___; +.Ldec_short_post_Q3: + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v4.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_Q4: + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v5.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_Q5: + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v6.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_Q6: + eor v7.16b, v7.16b, v7.16b + mov v7.b[0], w12 + /* we have one padded sha512 block now, process it and + then employ another one to host sha length */ +___ +&sha512_block(1); +$code.=<<___; + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + b .Ldec_short_post_sha + +.Ldec_less_than_4_block: +___ +for($i = 0; $i < 3; $i = $i + 1) { +$code.=<<___; + ld1 {v16.16b}, [x0], #16 + mov v$i.16b, v16.16b + + ldp q8, q9, [x7], #32 /* rk0, rk1 */ + ldp q10, q11, [x7], #32 /* rk2, rk3 */ + + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + ldp q8, q9, [x7], #32 /* rk4, rk5 */ + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + ldp q10, q11, [x7], #32 /* rk6, rk7 */ + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + ldp q8, q9, [x7], #32 /* rk8, rk9 */ + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + cmp x9, #12 /* tell 128,192,256 apart */ + b.lt .Laes128_dec_short_less_than_4_$i +.Laes192_dec_short_less_than_4_$i: + ldp q10,q11,[x7],32 /* rk10,rk11 */ + aesd v16.16b, v9.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v10.16b + aesimc v16.16b, v16.16b + b.gt .Laes256_dec_short_less_than_4_$i + ld1 {v8.16b},[x7] /* rk12 */ + aesd v16.16b, v11.16b + eor v16.16b, v16.16b, v8.16b + sub x7, x7, #192 /* rewind x7 */ + b 1f +.Laes256_dec_short_less_than_4_$i: + ldp q8,q9,[x7],32 /* rk12,rk13 */ + aesd v16.16b, v11.16b + aesimc v16.16b, v16.16b + aesd v16.16b, v8.16b + aesimc v16.16b, v16.16b + ld1 {v10.16b},[x7] /* rk14 */ + aesd v16.16b, v9.16b + eor v16.16b, v16.16b, v10.16b + sub x7, x7, #224 + b 1f +.Laes128_dec_short_less_than_4_$i: + ld1 {v10.16b},[x7] /* rk10 */ + aesd v16.16b,v9.16b + eor v16.16b, v16.16b, v10.16b + sub x7, x7, #160 +1: + sub x11, x11, 1 + eor v16.16b, v16.16b, v20.16b + ldr q20, [x0, #-16] + st1 {v16.16b}, [x1], #16 + cbz x11, .Ldec_short_post_Q$i +___ +} +$code.=<<___; +.Ldec_short_post_Q0: + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v1.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_Q1: + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v2.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_Q2: + eor v3.16b, v3.16b, v3.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + mov v3.b[0], w12 + b .Ldec_short_post_sha +.Ldec_short_post_sha: + /* we have last padded sha512 block now */ + eor x13, x13, x13 /* length_lo */ + eor x14, x14, x14 /* length_hi */ + + adds x13, x13, x2, lsl #3 /* add len in bits */ + lsr x15, x2, #61 + adc x14, x14, x15 + + adds x13, x13, #1024 /* add i_key_pad 1024 bits */ + adc x14, x14, xzr + + mov v7.d[0], x14 + mov v7.d[1], x13 + rev64 v7.16b, v7.16b +___ +&sha512_block(1); +$code.=<<___; + /* Final HMAC - opad part */ + mov v0.16b, v24.16b + mov v1.16b, v25.16b + mov v2.16b, v26.16b + mov v3.16b, v27.16b + eor v4.16b, v4.16b, v4.16b + eor v5.16b, v5.16b, v5.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + + mov v4.b[7], w12 /* padding 1 */ + mov x13, #1024+512 /* length in bits */ + mov v7.d[1], x13 + + /* load ABCDEFGH for opad */ + ldr x7, [x6, #HMAC_OKEYPAD] + ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x7] +___ +&sha512_block(0); +$code.=<<___; +.Ldec_ret: + mov x0, xzr /* return 0 */ + + rev64 v24.16b, v24.16b + rev64 v25.16b, v25.16b + rev64 v26.16b, v26.16b + rev64 v27.16b, v27.16b + + /* store hash result */ + st1 {v24.2d,v25.2d,v26.2d,v27.2d},[x4] + + /* restore callee save register */ + ldp d10, d11, [sp,#16] + ldp d12, d13, [sp,#32] + ldp d14, d15, [sp,#48] + ldp d8, d9, [sp], #64 + ret +.size asm_sha512_hmac_aescbc_dec, .-asm_sha512_hmac_aescbc_dec +___ +} +######################################### +{ my %opcode = ( + "sha512h" => 0xce608000, "sha512h2" => 0xce608400, + "sha512su0" => 0xcec08000, "sha512su1" => 0xce608800 ); + + sub unsha512 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + s/\b(sha512\w+)\s+([qv].*)/unsha512($1,$2)/ge; + print $_,"\n"; +} + +close STDOUT or die "error closing STDOUT: $!"; \ No newline at end of file diff --git a/crypto/aes/build.info b/crypto/aes/build.info index 661b34592f2..ed79316b006 100644 --- a/crypto/aes/build.info +++ b/crypto/aes/build.info @@ -33,7 +33,7 @@ IF[{- !$disabled{asm} -}] $AESDEF_armv4=AES_ASM BSAES_ASM $AESASM_aarch64=\ aes_core.c aes_cbc.c aesv8-armx.S bsaes-armv8.S vpaes-armv8.S \ - aes-sha1-armv8.S aes-sha256-armv8.S + aes-sha1-armv8.S aes-sha256-armv8.S aes-sha512-armv8.S $AESDEF_aarch64=BSAES_ASM VPAES_ASM $AESASM_parisc11=aes_core.c aes_cbc.c aes-parisc.s @@ -143,6 +143,8 @@ GENERATE[aes-sha1-armv8.S]=asm/aes-sha1-armv8.pl INCLUDE[aes-sha1-armv8.o]=.. GENERATE[aes-sha256-armv8.S]=asm/aes-sha256-armv8.pl INCLUDE[aes-sha256-armv8.o]=.. +GENERATE[aes-sha512-armv8.S]=asm/aes-sha512-armv8.pl +INCLUDE[aes-sha512-armv8.o]=.. GENERATE[aes-armv4.S]=asm/aes-armv4.pl INCLUDE[aes-armv4.o]=.. diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h index 4bd45d3558b..dcd0f3f41c4 100644 --- a/crypto/objects/obj_dat.h +++ b/crypto/objects/obj_dat.h @@ -1350,7 +1350,7 @@ static const unsigned char so[9517] = { 0x60,0x86,0x48,0x01,0x65,0x03,0x04,0x03,0x2E, /* [ 9507] OBJ_SLH_DSA_SHAKE_256f_WITH_SHAKE256 */ }; -#define NUM_NID 1493 +#define NUM_NID 1496 static const ASN1_OBJECT nid_objs[NUM_NID] = { {"UNDEF", "undefined", NID_undef}, {"rsadsi", "RSA Data Security, Inc.", NID_rsadsi, 6, &so[0]}, @@ -2845,9 +2845,12 @@ static const ASN1_OBJECT nid_objs[NUM_NID] = { {"AES-128-CBC-HMAC-SHA256-ETM", "aes-128-cbc-hmac-sha256-etm", NID_aes_128_cbc_hmac_sha256_etm}, {"AES-192-CBC-HMAC-SHA256-ETM", "aes-192-cbc-hmac-sha256-etm", NID_aes_192_cbc_hmac_sha256_etm}, {"AES-256-CBC-HMAC-SHA256-ETM", "aes-256-cbc-hmac-sha256-etm", NID_aes_256_cbc_hmac_sha256_etm}, + {"AES-128-CBC-HMAC-SHA512-ETM", "aes-128-cbc-hmac-sha512-etm", NID_aes_128_cbc_hmac_sha512_etm}, + {"AES-192-CBC-HMAC-SHA512-ETM", "aes-192-cbc-hmac-sha512-etm", NID_aes_192_cbc_hmac_sha512_etm}, + {"AES-256-CBC-HMAC-SHA512-ETM", "aes-256-cbc-hmac-sha512-etm", NID_aes_256_cbc_hmac_sha512_etm}, }; -#define NUM_SN 1484 +#define NUM_SN 1487 static const unsigned int sn_objs[NUM_SN] = { 364, /* "AD_DVCS" */ 419, /* "AES-128-CBC" */ @@ -2855,6 +2858,7 @@ static const unsigned int sn_objs[NUM_SN] = { 1487, /* "AES-128-CBC-HMAC-SHA1-ETM" */ 948, /* "AES-128-CBC-HMAC-SHA256" */ 1490, /* "AES-128-CBC-HMAC-SHA256-ETM" */ + 1493, /* "AES-128-CBC-HMAC-SHA512-ETM" */ 421, /* "AES-128-CFB" */ 650, /* "AES-128-CFB1" */ 653, /* "AES-128-CFB8" */ @@ -2869,6 +2873,7 @@ static const unsigned int sn_objs[NUM_SN] = { 1488, /* "AES-192-CBC-HMAC-SHA1-ETM" */ 949, /* "AES-192-CBC-HMAC-SHA256" */ 1491, /* "AES-192-CBC-HMAC-SHA256-ETM" */ + 1494, /* "AES-192-CBC-HMAC-SHA512-ETM" */ 425, /* "AES-192-CFB" */ 651, /* "AES-192-CFB1" */ 654, /* "AES-192-CFB8" */ @@ -2882,6 +2887,7 @@ static const unsigned int sn_objs[NUM_SN] = { 1489, /* "AES-256-CBC-HMAC-SHA1-ETM" */ 950, /* "AES-256-CBC-HMAC-SHA256" */ 1492, /* "AES-256-CBC-HMAC-SHA256-ETM" */ + 1495, /* "AES-256-CBC-HMAC-SHA512-ETM" */ 429, /* "AES-256-CFB" */ 652, /* "AES-256-CFB1" */ 655, /* "AES-256-CFB8" */ @@ -4335,7 +4341,7 @@ static const unsigned int sn_objs[NUM_SN] = { 1289, /* "zstd" */ }; -#define NUM_LN 1484 +#define NUM_LN 1487 static const unsigned int ln_objs[NUM_LN] = { 363, /* "AD Time Stamping" */ 405, /* "ANSI X9.62" */ @@ -4773,6 +4779,7 @@ static const unsigned int ln_objs[NUM_LN] = { 1487, /* "aes-128-cbc-hmac-sha1-etm" */ 948, /* "aes-128-cbc-hmac-sha256" */ 1490, /* "aes-128-cbc-hmac-sha256-etm" */ + 1493, /* "aes-128-cbc-hmac-sha512-etm" */ 896, /* "aes-128-ccm" */ 421, /* "aes-128-cfb" */ 650, /* "aes-128-cfb1" */ @@ -4789,6 +4796,7 @@ static const unsigned int ln_objs[NUM_LN] = { 1488, /* "aes-192-cbc-hmac-sha1-etm" */ 949, /* "aes-192-cbc-hmac-sha256" */ 1491, /* "aes-192-cbc-hmac-sha256-etm" */ + 1494, /* "aes-192-cbc-hmac-sha512-etm" */ 899, /* "aes-192-ccm" */ 425, /* "aes-192-cfb" */ 651, /* "aes-192-cfb1" */ @@ -4804,6 +4812,7 @@ static const unsigned int ln_objs[NUM_LN] = { 1489, /* "aes-256-cbc-hmac-sha1-etm" */ 950, /* "aes-256-cbc-hmac-sha256" */ 1492, /* "aes-256-cbc-hmac-sha256-etm" */ + 1495, /* "aes-256-cbc-hmac-sha512-etm" */ 902, /* "aes-256-ccm" */ 429, /* "aes-256-cfb" */ 652, /* "aes-256-cfb1" */ diff --git a/crypto/objects/obj_mac.num b/crypto/objects/obj_mac.num index b4363931112..15bd8909e8e 100644 --- a/crypto/objects/obj_mac.num +++ b/crypto/objects/obj_mac.num @@ -1490,3 +1490,6 @@ aes_256_cbc_hmac_sha1_etm 1489 aes_128_cbc_hmac_sha256_etm 1490 aes_192_cbc_hmac_sha256_etm 1491 aes_256_cbc_hmac_sha256_etm 1492 +aes_128_cbc_hmac_sha512_etm 1493 +aes_192_cbc_hmac_sha512_etm 1494 +aes_256_cbc_hmac_sha512_etm 1495 diff --git a/crypto/objects/objects.txt b/crypto/objects/objects.txt index 9c61c4a642a..6afeefff608 100644 --- a/crypto/objects/objects.txt +++ b/crypto/objects/objects.txt @@ -1727,6 +1727,9 @@ sm-scheme 104 10 : SM4-XTS : sm4-xts : AES-128-CBC-HMAC-SHA256-ETM : aes-128-cbc-hmac-sha256-etm : AES-192-CBC-HMAC-SHA256-ETM : aes-192-cbc-hmac-sha256-etm : AES-256-CBC-HMAC-SHA256-ETM : aes-256-cbc-hmac-sha256-etm + : AES-128-CBC-HMAC-SHA512-ETM : aes-128-cbc-hmac-sha512-etm + : AES-192-CBC-HMAC-SHA512-ETM : aes-192-cbc-hmac-sha512-etm + : AES-256-CBC-HMAC-SHA512-ETM : aes-256-cbc-hmac-sha512-etm ISO-US 10046 2 1 : dhpublicnumber : X9.42 DH diff --git a/include/crypto/aes_platform.h b/include/crypto/aes_platform.h index 34aa74ecb25..bdd51976a7a 100644 --- a/include/crypto/aes_platform.h +++ b/include/crypto/aes_platform.h @@ -94,7 +94,7 @@ void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp, size_t len); # if (defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)) # include "crypto/arm_arch.h" -# if __ARM_MAX_ARCH__>=7 +# if __ARM_MAX_ARCH__ >= 7 # if defined(BSAES_ASM) # define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) # endif @@ -116,6 +116,8 @@ void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp, size_t len); (OPENSSL_armcap_P & ARMV8_SHA1)) # define HWAES_CBC_HMAC_SHA256_ETM_CAPABLE (HWAES_CAPABLE && \ (OPENSSL_armcap_P & ARMV8_SHA256)) +# define HWAES_CBC_HMAC_SHA512_ETM_CAPABLE (HWAES_CAPABLE && \ + (OPENSSL_armcap_P & ARMV8_SHA512)) # ifndef __AARCH64EB__ # define AES_CBC_HMAC_SHA_ETM_CAPABLE 1 # endif diff --git a/include/openssl/obj_mac.h b/include/openssl/obj_mac.h index 0f3d79a889f..8326216f67f 100644 --- a/include/openssl/obj_mac.h +++ b/include/openssl/obj_mac.h @@ -5482,6 +5482,18 @@ #define LN_aes_256_cbc_hmac_sha256_etm "aes-256-cbc-hmac-sha256-etm" #define NID_aes_256_cbc_hmac_sha256_etm 1492 +#define SN_aes_128_cbc_hmac_sha512_etm "AES-128-CBC-HMAC-SHA512-ETM" +#define LN_aes_128_cbc_hmac_sha512_etm "aes-128-cbc-hmac-sha512-etm" +#define NID_aes_128_cbc_hmac_sha512_etm 1493 + +#define SN_aes_192_cbc_hmac_sha512_etm "AES-192-CBC-HMAC-SHA512-ETM" +#define LN_aes_192_cbc_hmac_sha512_etm "aes-192-cbc-hmac-sha512-etm" +#define NID_aes_192_cbc_hmac_sha512_etm 1494 + +#define SN_aes_256_cbc_hmac_sha512_etm "AES-256-CBC-HMAC-SHA512-ETM" +#define LN_aes_256_cbc_hmac_sha512_etm "aes-256-cbc-hmac-sha512-etm" +#define NID_aes_256_cbc_hmac_sha512_etm 1495 + #define SN_dhpublicnumber "dhpublicnumber" #define LN_dhpublicnumber "X9.42 DH" #define NID_dhpublicnumber 920 diff --git a/providers/common/include/prov/providercommon.h b/providers/common/include/prov/providercommon.h index 7621b4b1d62..f6503572c2a 100644 --- a/providers/common/include/prov/providercommon.h +++ b/providers/common/include/prov/providercommon.h @@ -16,6 +16,7 @@ int ossl_cipher_capable_aes_cbc_hmac_sha1(void); int ossl_cipher_capable_aes_cbc_hmac_sha256(void); int ossl_cipher_capable_aes_cbc_hmac_sha1_etm(void); int ossl_cipher_capable_aes_cbc_hmac_sha256_etm(void); +int ossl_cipher_capable_aes_cbc_hmac_sha512_etm(void); OSSL_FUNC_provider_get_capabilities_fn ossl_prov_get_capabilities; diff --git a/providers/defltprov.c b/providers/defltprov.c index 6e33f6ee66b..7e3c354de0f 100644 --- a/providers/defltprov.c +++ b/providers/defltprov.c @@ -226,7 +226,7 @@ static const OSSL_ALGORITHM_CAPABLE deflt_ciphers[] = { ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA1, ossl_aes256cbc_hmac_sha1_functions, ossl_cipher_capable_aes_cbc_hmac_sha1), ALGC(PROV_NAMES_AES_128_CBC_HMAC_SHA256, ossl_aes128cbc_hmac_sha256_functions, - ossl_cipher_capable_aes_cbc_hmac_sha256), + ossl_cipher_capable_aes_cbc_hmac_sha256), ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA256, ossl_aes256cbc_hmac_sha256_functions, ossl_cipher_capable_aes_cbc_hmac_sha256), ALGC(PROV_NAMES_AES_128_CBC_HMAC_SHA1_ETM, ossl_aes128cbc_hmac_sha1_etm_functions, @@ -236,11 +236,17 @@ static const OSSL_ALGORITHM_CAPABLE deflt_ciphers[] = { ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA1_ETM, ossl_aes256cbc_hmac_sha1_etm_functions, ossl_cipher_capable_aes_cbc_hmac_sha1_etm), ALGC(PROV_NAMES_AES_128_CBC_HMAC_SHA256_ETM, ossl_aes128cbc_hmac_sha256_etm_functions, - ossl_cipher_capable_aes_cbc_hmac_sha256_etm), + ossl_cipher_capable_aes_cbc_hmac_sha256_etm), ALGC(PROV_NAMES_AES_192_CBC_HMAC_SHA256_ETM, ossl_aes192cbc_hmac_sha256_etm_functions, - ossl_cipher_capable_aes_cbc_hmac_sha256_etm), + ossl_cipher_capable_aes_cbc_hmac_sha256_etm), ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA256_ETM, ossl_aes256cbc_hmac_sha256_etm_functions, ossl_cipher_capable_aes_cbc_hmac_sha256_etm), + ALGC(PROV_NAMES_AES_128_CBC_HMAC_SHA512_ETM, ossl_aes128cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), + ALGC(PROV_NAMES_AES_192_CBC_HMAC_SHA512_ETM, ossl_aes192cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), + ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA512_ETM, ossl_aes256cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), #ifndef OPENSSL_NO_ARIA ALG(PROV_NAMES_ARIA_256_GCM, ossl_aria256gcm_functions), ALG(PROV_NAMES_ARIA_192_GCM, ossl_aria192gcm_functions), diff --git a/providers/fips/fipsprov.c b/providers/fips/fipsprov.c index 03258fc97f5..21032b9ba2e 100644 --- a/providers/fips/fipsprov.c +++ b/providers/fips/fipsprov.c @@ -366,6 +366,12 @@ static const OSSL_ALGORITHM_CAPABLE fips_ciphers[] = { ossl_cipher_capable_aes_cbc_hmac_sha256_etm), ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA256_ETM, ossl_aes256cbc_hmac_sha256_etm_functions, ossl_cipher_capable_aes_cbc_hmac_sha256_etm), + ALGC(PROV_NAMES_AES_128_CBC_HMAC_SHA512_ETM, ossl_aes128cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), + ALGC(PROV_NAMES_AES_192_CBC_HMAC_SHA512_ETM, ossl_aes192cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), + ALGC(PROV_NAMES_AES_256_CBC_HMAC_SHA512_ETM, ossl_aes256cbc_hmac_sha512_etm_functions, + ossl_cipher_capable_aes_cbc_hmac_sha512_etm), #ifndef OPENSSL_NO_DES ALG(PROV_NAMES_DES_EDE3_ECB, ossl_tdes_ede3_ecb_functions), ALG(PROV_NAMES_DES_EDE3_CBC, ossl_tdes_ede3_cbc_functions), diff --git a/providers/implementations/ciphers/build.info b/providers/implementations/ciphers/build.info index 47c140ace11..e9dbfecfacf 100644 --- a/providers/implementations/ciphers/build.info +++ b/providers/implementations/ciphers/build.info @@ -108,6 +108,7 @@ SOURCE[$AES_GOAL]=\ cipher_aes_cbc_hmac_sha_etm.c \ cipher_aes_cbc_hmac_sha1_etm_hw.c \ cipher_aes_cbc_hmac_sha256_etm_hw.c \ + cipher_aes_cbc_hmac_sha512_etm_hw.c \ cipher_cts.c DEFINE[$AES_GOAL]=$AESXTSDEF diff --git a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha1_etm_hw.c b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha1_etm_hw.c index 5d164ff5d71..7d5461ca1c3 100644 --- a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha1_etm_hw.c +++ b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha1_etm_hw.c @@ -74,7 +74,9 @@ static int hwaes_cbc_hmac_sha1_etm(PROV_CIPHER_CTX *vctx, { PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; CIPH_DIGEST arg = {0}; + ciph_digest_arg_init(&arg, vctx); + if (len % AES_BLOCK_SIZE) { ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_INPUT_LENGTH); return 0; @@ -166,8 +168,8 @@ static int aes_cbc_hmac_sha1_cipher(PROV_CIPHER_CTX *vctx, static const PROV_CIPHER_HW_AES_HMAC_SHA_ETM cipher_hw_aes_hmac_sha1_etm = { { - aes_cbc_hmac_sha1_init_key, - aes_cbc_hmac_sha1_cipher + aes_cbc_hmac_sha1_init_key, + aes_cbc_hmac_sha1_cipher }, aes_cbc_hmac_sha1_set_mac_key }; diff --git a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha256_etm_hw.c b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha256_etm_hw.c index 8a5474fc655..95116ae3894 100644 --- a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha256_etm_hw.c +++ b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha256_etm_hw.c @@ -24,11 +24,11 @@ void sha256_block_data_order(void *c, const void *p, size_t len); # if defined(__aarch64__) int asm_aescbc_sha256_hmac(const uint8_t *csrc, uint8_t *cdst, uint64_t clen, - uint8_t *dsrc, uint8_t *ddst, uint64_t dlen, - CIPH_DIGEST *arg); + uint8_t *dsrc, uint8_t *ddst, uint64_t dlen, + CIPH_DIGEST *arg); void asm_sha256_hmac_aescbc_dec(const uint8_t *csrc, uint8_t *cdst, uint64_t clen, - const unsigned char *dsrc, uint8_t *ddst, size_t dlen, - CIPH_DIGEST *arg); + const unsigned char *dsrc, uint8_t *ddst, size_t dlen, + CIPH_DIGEST *arg); # define HWAES128_ENC_CBC_SHA256_ETM asm_aescbc_sha256_hmac # define HWAES128_DEC_CBC_SHA256_ETM asm_sha256_hmac_aescbc_dec # endif @@ -74,7 +74,9 @@ static int hwaes_cbc_hmac_sha256_etm(PROV_CIPHER_CTX *vctx, { PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; CIPH_DIGEST arg = {0}; + ciph_digest_arg_init(&arg, vctx); + if (len % AES_BLOCK_SIZE) { ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_INPUT_LENGTH); return 0; @@ -166,8 +168,8 @@ static int aes_cbc_hmac_sha256_cipher(PROV_CIPHER_CTX *vctx, static const PROV_CIPHER_HW_AES_HMAC_SHA_ETM cipher_hw_aes_hmac_sha256_etm = { { - aes_cbc_hmac_sha256_init_key, - aes_cbc_hmac_sha256_cipher + aes_cbc_hmac_sha256_init_key, + aes_cbc_hmac_sha256_cipher }, aes_cbc_hmac_sha256_set_mac_key }; diff --git a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha512_etm_hw.c b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha512_etm_hw.c new file mode 100644 index 00000000000..5d2a270df85 --- /dev/null +++ b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha512_etm_hw.c @@ -0,0 +1,191 @@ +/* + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/* + * All low level APIs are deprecated for public use, but still ok for internal + * use where we're using them to implement the higher level EVP interface, as is + * the case here. + */ +#include "internal/deprecated.h" +#include "cipher_aes_cbc_hmac_sha_etm.h" + +#if !defined(AES_CBC_HMAC_SHA_ETM_CAPABLE) +int ossl_cipher_capable_aes_cbc_hmac_sha512_etm(void) +{ + return 0; +} + +const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha512_etm(void) +{ + return NULL; +} +#else +# if defined(__aarch64__) +void asm_aescbc_sha512_hmac(const uint8_t *csrc, uint8_t *cdst, uint64_t clen, + uint8_t *dsrc, uint8_t *ddst, uint64_t dlen, + CIPH_DIGEST *arg); +void asm_sha512_hmac_aescbc_dec(const uint8_t *csrc, uint8_t *cdst, uint64_t clen, + uint8_t *dsrc, uint8_t *ddst, uint64_t dlen, + CIPH_DIGEST *arg); +# define HWAES_ENC_CBC_SHA512_ETM asm_aescbc_sha512_hmac +# define HWAES_DEC_CBC_SHA512_ETM asm_sha512_hmac_aescbc_dec +# endif + +int ossl_cipher_capable_aes_cbc_hmac_sha512_etm(void) +{ + return HWAES_CBC_HMAC_SHA512_ETM_CAPABLE; +} + +static int hwaes_cbc_hmac_sha512_init_key(PROV_CIPHER_CTX *vctx, + const unsigned char *key, + size_t keylen) +{ + int ret; + PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; + PROV_AES_HMAC_SHA512_ETM_CTX *sctx = (PROV_AES_HMAC_SHA512_ETM_CTX *)vctx; + + if (ctx->base.enc) + ret = aes_v8_set_encrypt_key(key, ctx->base.keylen * 8, &ctx->ks); + else + ret = aes_v8_set_decrypt_key(key, ctx->base.keylen * 8, &ctx->ks); + + SHA512_Init(&sctx->head); /* handy when benchmarking */ + sctx->tail = sctx->head; + + return ret < 0 ? 0 : 1; +} + +void sha512_block_data_order(void *c, const void *p, size_t len); + +static void sha512_update(SHA512_CTX *c, const void *data, size_t len) +{ + const unsigned char *ptr = data; + size_t res; + + if ((res = c->num)) { + res = SHA512_CBLOCK - res; + if (len < res) + res = len; + SHA512_Update(c, ptr, res); + ptr += res; + len -= res; + } + + res = len % SHA512_CBLOCK; + len -= res; + + if (len) { + sha512_block_data_order(c, ptr, len / SHA512_CBLOCK); + + ptr += len; + c->Nh += len >> 61; + c->Nl += len <<= 3; + if (c->Nl < (unsigned int)len) + c->Nh++; + } + + if (res) + SHA512_Update(c, ptr, res); +} + +static void ciph_digest_arg_init(CIPH_DIGEST *arg, PROV_CIPHER_CTX *vctx) +{ + PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; + PROV_AES_HMAC_SHA512_ETM_CTX *sctx = (PROV_AES_HMAC_SHA512_ETM_CTX *)vctx; + + arg->cipher.key = (uint8_t *)&(ctx->ks); + arg->cipher.key_rounds = ctx->ks.rounds; + arg->cipher.iv = (uint8_t *)&(ctx->base.iv); + arg->digest.hmac.i_key_pad = (uint8_t *)&(sctx->head); + arg->digest.hmac.o_key_pad = (uint8_t *)&(sctx->tail); +} + +static int hwaes_cbc_hmac_sha512_etm(PROV_CIPHER_CTX *vctx, + unsigned char *out, + const unsigned char *in, size_t len) +{ + PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; + CIPH_DIGEST arg = {0}; + + ciph_digest_arg_init(&arg, vctx); + + if (len % AES_BLOCK_SIZE) { + ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_INPUT_LENGTH); + return 0; + } + + if (ctx->base.enc) { + HWAES_ENC_CBC_SHA512_ETM(in, out, len, out, ctx->tag, len, &arg); + return 1; + } else { + if (ctx->taglen == 0) { + ERR_raise(ERR_LIB_PROV, PROV_R_TAG_NOT_SET); + return 0; + } + HWAES_DEC_CBC_SHA512_ETM(in, out, len, out, ctx->tag, len, &arg); + if (CRYPTO_memcmp(ctx->exp_tag, ctx->tag, ctx->taglen)) { + ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_TAG); + return 0; + } + return 1; + } +} + +static int hwaes_cbc_hmac_sha512_cipher(PROV_CIPHER_CTX *vctx, + unsigned char *out, + const unsigned char *in, size_t len) +{ + return hwaes_cbc_hmac_sha512_etm(vctx, out, in, len); +} + +static void hwaes_cbc_hmac_sha512_set_mac_key(void *vctx, + const unsigned char *mackey, + size_t len) +{ + PROV_AES_HMAC_SHA512_ETM_CTX *ctx = (PROV_AES_HMAC_SHA512_ETM_CTX *)vctx; + unsigned int i; + unsigned char hmac_key[128]; + + memset(hmac_key, 0, sizeof(hmac_key)); + + if (len > sizeof(hmac_key)) { + SHA512_Init(&ctx->head); + sha512_update(&ctx->head, mackey, len); + SHA512_Final(hmac_key, &ctx->head); + } else { + memcpy(hmac_key, mackey, len); + } + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36; /* ipad */ + SHA512_Init(&ctx->head); + sha512_update(&ctx->head, hmac_key, sizeof(hmac_key)); + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */ + SHA512_Init(&ctx->tail); + sha512_update(&ctx->tail, hmac_key, sizeof(hmac_key)); + + OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); +} + +static const PROV_CIPHER_HW_AES_HMAC_SHA_ETM cipher_hw_aes_hmac_sha512_etm = { + { + hwaes_cbc_hmac_sha512_init_key, + hwaes_cbc_hmac_sha512_cipher + }, + hwaes_cbc_hmac_sha512_set_mac_key +}; + +const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha512_etm(void) +{ + return &cipher_hw_aes_hmac_sha512_etm; +} + +#endif /* !defined(AES_CBC_HMAC_SHA_CAPABLE) */ diff --git a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.c b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.c index 0292511353d..180cc107839 100644 --- a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.c +++ b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.c @@ -16,7 +16,7 @@ #ifndef AES_CBC_HMAC_SHA_ETM_CAPABLE # define IMPLEMENT_CIPHER(nm, sub, kbits, blkbits, ivbits, flags) \ const OSSL_DISPATCH ossl_##nm##kbits##sub##_functions[] = { \ - OSSL_DISPATCH_END \ + OSSL_DISPATCH_END \ }; #else static OSSL_FUNC_cipher_encrypt_init_fn aes_einit; @@ -32,7 +32,7 @@ static int aes_set_ctx_params(void *vctx, const OSSL_PARAM params[]) { PROV_AES_HMAC_SHA_ETM_CTX *ctx = (PROV_AES_HMAC_SHA_ETM_CTX *)vctx; PROV_CIPHER_HW_AES_HMAC_SHA_ETM *hw = - (PROV_CIPHER_HW_AES_HMAC_SHA_ETM *)ctx->hw; + (PROV_CIPHER_HW_AES_HMAC_SHA_ETM *)ctx->hw; const OSSL_PARAM *p; if (params == NULL) @@ -82,8 +82,8 @@ static int aes_set_ctx_params(void *vctx, const OSSL_PARAM params[]) } static int aes_einit(void *ctx, const unsigned char *key, size_t keylen, - const unsigned char *iv, size_t ivlen, - const OSSL_PARAM params[]) + const unsigned char *iv, size_t ivlen, + const OSSL_PARAM params[]) { if (!ossl_cipher_generic_einit(ctx, key, keylen, iv, ivlen, NULL)) return 0; @@ -91,8 +91,8 @@ static int aes_einit(void *ctx, const unsigned char *key, size_t keylen, } static int aes_dinit(void *ctx, const unsigned char *key, size_t keylen, - const unsigned char *iv, size_t ivlen, - const OSSL_PARAM params[]) + const unsigned char *iv, size_t ivlen, + const OSSL_PARAM params[]) { if (!ossl_cipher_generic_dinit(ctx, key, keylen, iv, ivlen, NULL)) return 0; @@ -259,6 +259,43 @@ static void *aes_cbc_hmac_sha256_etm_dupctx(void *provctx) return OPENSSL_memdup(ctx, sizeof(*ctx)); } +static void *aes_cbc_hmac_sha512_etm_newctx(void *provctx, size_t kbits, + size_t blkbits, size_t ivbits, + uint64_t flags) +{ + PROV_AES_HMAC_SHA512_ETM_CTX *ctx; + + if (!ossl_prov_is_running()) + return NULL; + + ctx = OPENSSL_zalloc(sizeof(*ctx)); + if (ctx != NULL) + base_ctx_init(provctx, &ctx->base_ctx, + ossl_prov_cipher_hw_aes_cbc_hmac_sha512_etm(), kbits, blkbits, + ivbits, flags); + return ctx; +} + +static void aes_cbc_hmac_sha512_etm_freectx(void *vctx) +{ + PROV_AES_HMAC_SHA512_ETM_CTX *ctx = (PROV_AES_HMAC_SHA512_ETM_CTX *)vctx; + + if (ctx != NULL) { + ossl_cipher_generic_reset_ctx((PROV_CIPHER_CTX *)vctx); + OPENSSL_clear_free(ctx, sizeof(*ctx)); + } +} + +static void *aes_cbc_hmac_sha512_etm_dupctx(void *provctx) +{ + PROV_AES_HMAC_SHA512_ETM_CTX *ctx = provctx; + + if (ctx == NULL) + return NULL; + + return OPENSSL_memdup(ctx, sizeof(*ctx)); +} + # define IMPLEMENT_CIPHER(nm, sub, kbits, blkbits, ivbits, flags) \ static OSSL_FUNC_cipher_newctx_fn nm##_##kbits##_##sub##_newctx; \ static void *nm##_##kbits##_##sub##_newctx(void *provctx) \ @@ -307,4 +344,10 @@ IMPLEMENT_CIPHER(aes, cbc_hmac_sha256_etm, 128, 128, 128, EVP_CIPH_FLAG_ENC_THEN /* ossl_aes192cbc_hmac_sha256_etm_functions */ IMPLEMENT_CIPHER(aes, cbc_hmac_sha256_etm, 192, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) /* ossl_aes256cbc_hmac_sha256_etm_functions */ -IMPLEMENT_CIPHER(aes, cbc_hmac_sha256_etm, 256, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) \ No newline at end of file +IMPLEMENT_CIPHER(aes, cbc_hmac_sha256_etm, 256, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) +/* ossl_aes128cbc_hmac_sha512_etm_functions */ +IMPLEMENT_CIPHER(aes, cbc_hmac_sha512_etm, 128, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) +/* ossl_aes192cbc_hmac_sha512_etm_functions */ +IMPLEMENT_CIPHER(aes, cbc_hmac_sha512_etm, 192, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) +/* ossl_aes256cbc_hmac_sha512_etm_functions */ +IMPLEMENT_CIPHER(aes, cbc_hmac_sha512_etm, 256, 128, 128, EVP_CIPH_FLAG_ENC_THEN_MAC) diff --git a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.h b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.h index c8b2b1e5ff9..460ccbc7366 100644 --- a/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.h +++ b/providers/implementations/ciphers/cipher_aes_cbc_hmac_sha_etm.h @@ -13,6 +13,7 @@ int ossl_cipher_capable_aes_cbc_hmac_sha1_etm(void); int ossl_cipher_capable_aes_cbc_hmac_sha256_etm(void); +int ossl_cipher_capable_aes_cbc_hmac_sha512_etm(void); typedef struct prov_cipher_hw_aes_hmac_sha_ctx_etm_st { PROV_CIPHER_HW base; /* must be first */ @@ -21,12 +22,13 @@ typedef struct prov_cipher_hw_aes_hmac_sha_ctx_etm_st { const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha1_etm(void); const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha256_etm(void); +const PROV_CIPHER_HW_AES_HMAC_SHA_ETM *ossl_prov_cipher_hw_aes_cbc_hmac_sha512_etm(void); #ifdef AES_CBC_HMAC_SHA_ETM_CAPABLE # include # include -# define AES_CBC_MAX_HMAC_SIZE 32 +# define AES_CBC_MAX_HMAC_SIZE 64 typedef struct prov_aes_hmac_sha_etm_ctx_st { PROV_CIPHER_CTX base; @@ -47,6 +49,11 @@ typedef struct prov_aes_hmac_sha256_etm_ctx_st { SHA256_CTX head, tail; } PROV_AES_HMAC_SHA256_ETM_CTX; +typedef struct prov_aes_hmac_sha512_etm_ctx_st { + PROV_AES_HMAC_SHA_ETM_CTX base_ctx; + SHA512_CTX head, tail, md; +} PROV_AES_HMAC_SHA512_ETM_CTX; + typedef struct { struct { uint8_t *key; diff --git a/providers/implementations/include/prov/implementations.h b/providers/implementations/include/prov/implementations.h index 2b770badc69..311e5f38693 100644 --- a/providers/implementations/include/prov/implementations.h +++ b/providers/implementations/include/prov/implementations.h @@ -104,6 +104,9 @@ extern const OSSL_DISPATCH ossl_aes256cbc_hmac_sha1_etm_functions[]; extern const OSSL_DISPATCH ossl_aes128cbc_hmac_sha256_etm_functions[]; extern const OSSL_DISPATCH ossl_aes192cbc_hmac_sha256_etm_functions[]; extern const OSSL_DISPATCH ossl_aes256cbc_hmac_sha256_etm_functions[]; +extern const OSSL_DISPATCH ossl_aes128cbc_hmac_sha512_etm_functions[]; +extern const OSSL_DISPATCH ossl_aes192cbc_hmac_sha512_etm_functions[]; +extern const OSSL_DISPATCH ossl_aes256cbc_hmac_sha512_etm_functions[]; #ifndef OPENSSL_NO_ARIA extern const OSSL_DISPATCH ossl_aria256gcm_functions[]; diff --git a/providers/implementations/include/prov/names.h b/providers/implementations/include/prov/names.h index 19fdf635c07..34e1147b2b7 100644 --- a/providers/implementations/include/prov/names.h +++ b/providers/implementations/include/prov/names.h @@ -217,6 +217,9 @@ #define PROV_NAMES_AES_128_CBC_HMAC_SHA256_ETM "AES-128-CBC-HMAC-SHA256-ETM" #define PROV_NAMES_AES_192_CBC_HMAC_SHA256_ETM "AES-192-CBC-HMAC-SHA256-ETM" #define PROV_NAMES_AES_256_CBC_HMAC_SHA256_ETM "AES-256-CBC-HMAC-SHA256-ETM" +#define PROV_NAMES_AES_128_CBC_HMAC_SHA512_ETM "AES-128-CBC-HMAC-SHA512-ETM" +#define PROV_NAMES_AES_192_CBC_HMAC_SHA512_ETM "AES-192-CBC-HMAC-SHA512-ETM" +#define PROV_NAMES_AES_256_CBC_HMAC_SHA512_ETM "AES-256-CBC-HMAC-SHA512-ETM" /*- * Digests diff --git a/test/evp_libctx_test.c b/test/evp_libctx_test.c index ae03e589a03..706460170b9 100644 --- a/test/evp_libctx_test.c +++ b/test/evp_libctx_test.c @@ -488,7 +488,7 @@ static int test_cipher_reinit_partialupdate(int test_id) /* skip any ciphers that don't allow partial updates */ if (((EVP_CIPHER_get_flags(cipher) & (EVP_CIPH_FLAG_CTS | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK | - EVP_CIPH_FLAG_ENC_THEN_MAC)) != 0) + EVP_CIPH_FLAG_ENC_THEN_MAC)) != 0) || EVP_CIPHER_get_mode(cipher) == EVP_CIPH_CCM_MODE || EVP_CIPHER_get_mode(cipher) == EVP_CIPH_XTS_MODE || EVP_CIPHER_get_mode(cipher) == EVP_CIPH_WRAP_MODE) { diff --git a/test/evp_test.c b/test/evp_test.c index f45b7d81662..db5271385e5 100644 --- a/test/evp_test.c +++ b/test/evp_test.c @@ -1346,13 +1346,13 @@ static int cipher_test_enc(EVP_TEST *t, int enc, size_t out_misalign, } else if (!enc && (expected->aead == EVP_CIPH_OCB_MODE || expected->tag_late)) { if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, - expected->tag_len, expected->tag) <= 0) { + expected->tag_len, expected->tag) <= 0) { t->err = "TAG_SET_ERROR"; goto err; } } else if (!enc && expected->mac_key && expected->tag) { - if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, - expected->tag_len, expected->tag) <= 0) { + if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, + expected->tag_len, expected->tag) <= 0) { t->err = "TAG_SET_ERROR"; goto err; } @@ -1453,25 +1453,32 @@ static int cipher_test_enc(EVP_TEST *t, int enc, size_t out_misalign, if (enc && expected->tag) { if (EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA1-ETM") || EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA256-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA512-ETM") || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA1-ETM") || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA256-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA512-ETM") || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA1-ETM") - || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA256-ETM")) { - unsigned char rtag[32] = {0}; + || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA256-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA512-ETM")) { + unsigned char rtag[64] = {0}; unsigned tag_len = 0; OSSL_PARAM params[2]; if (EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA1-ETM") || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA1-ETM") - || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA1-ETM")) { + || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA1-ETM")) tag_len = 20; - } else if (EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA256-ETM") - || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA256-ETM") - || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA256-ETM")) { + else if (EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA256-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA256-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA256-ETM")) tag_len = 32; - } + else if (EVP_CIPHER_is_a(expected->cipher, "AES-128-CBC-HMAC-SHA512-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-192-CBC-HMAC-SHA512-ETM") + || EVP_CIPHER_is_a(expected->cipher, "AES-256-CBC-HMAC-SHA512-ETM")) + tag_len = 64; - if (!TEST_size_t_le(expected->tag_len, tag_len)) { + if (!TEST_size_t_le(expected->tag_len, tag_len) || + !TEST_size_t_le(tag_len, sizeof(rtag))) { t->err = "TAG_LENGTH_INTERNAL_ERROR"; goto err; } diff --git a/test/recipes/30-test_evp_data/evpciph_aes_stitched.txt b/test/recipes/30-test_evp_data/evpciph_aes_stitched.txt index 06da481bb5b..050ce9b419c 100644 --- a/test/recipes/30-test_evp_data/evpciph_aes_stitched.txt +++ b/test/recipes/30-test_evp_data/evpciph_aes_stitched.txt @@ -229,3 +229,56 @@ Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021 Ciphertext = 261cd0c88a4d4e6db7fc263257a9f6d0ce83c1ff5f2680dc57ffd8eefdbb9c00d3d507672d105a990b2b78509978625b9d93c2bd41e3fb721abd1496553c583c67dad9b662b3d58c8540e10ed9c5ed1a7f33ce9e9a41c30836651d73ee2c003af03a919eb41a6d70ef814e184e740f8a4ca75016ae77ac335ba758396232a87ffceacf24a0e287371eaa04570cb68dcd61882e1c3f7aca38afed34138fedefe167bb9c741ebd14da2eba3cf5b9aa06bb93ca61fa462de7e1f439efac5ea55edab61171250be36da513e6b5f92c8267f778cdde5720128a586c7bbd5864686b12710daa9f133706e81fa3a066bd1f29277c08ca8f052b3ed06f04ec2a8509f54934fd9b06f4115e546011ff485ac76d5fce0329c94bf5f29726bed49ace94abf53b036c1f920f8c71d44deca7b11f653025698425717bb3cc8f5e74230d8ede675ee0eae6f8aae274152c7503c567427a71323feb84b0fc0515030c933e4c7399be13322b5d4ccabb97c011d75de82f38a540e972bc2a515dc31d50e78b74be891cc4a2ddbe4b50d0d27c069985a581b80a9f591a4bb198f085af2138ca9b4f595c37d60f15d960b1e39de7ff92a699d9aca4a44ff9d327c7130e6b0ce90032e358f3743d8abccaeb0426226d6ec233fdf289bdde5f3b2756a587a382e3353d77acb9774bd64978629633f2122d1fa376b12cfbe4781d6a35227d71fdfa929c1435596fbaf7fe0aea4fa02c6b9e8099c62149ed82819a2088b72660be8ea364c13d5340be93cab8ac92914d2b1115cbb7 Tag = 8cb8898a5b559984da3cbaa4703c9ed3cfc2f56c7292a3279a3dd5f7475412e1 +Title = AES-128-CBC-HMAC-SHA512-ETM test vectors + +Cipher = AES-128-CBC-HMAC-SHA512-ETM +Key = feffe9928665731c6d6a8f9467308308 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f +Ciphertext = 18bd54842828fdc0ac5a3b459f32f0be +Tag = 75c1883b2a1b71b98d04a0fc46b91b7e5d6e12c23a8e19a914d88be9a1d8a9f77022bff6144dfba69764565606856bf0f2510fef52bc4aa3a5b9089975a0400a + +Cipher = AES-128-CBC-HMAC-SHA512-ETM +Key = feffe9928665731c6d6a8f9467308308 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f +Ciphertext = 18bd54842828fdc0ac5a3b459f32f0be305a77944b17f62fedd4442ae60a0b0a3e1c2c23c584c86877fbd9997b415959254ea06ef046dc2e1fdafe7950a77ba94494683e01a0c495dc223a2de73be1474bcdf0b104a89ca6d419254e8f602334158d188f748c5cf4b7473c7475b4cf6c +Tag = 60ddf92cf1ed62ca3213cda9a497fbbd1f54c3a10177a9ccc3c8282dc58800edf5f710b08413fe1eb422a4efc77b97ef1a87da44bb7e8547c5364200f9ee48fc + +Cipher = AES-128-CBC-HMAC-SHA512-ETM +Key = feffe9928665731c6d6a8f9467308308 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f +Ciphertext = 18bd54842828fdc0ac5a3b459f32f0be305a77944b17f62fedd4442ae60a0b0a3e1c2c23c584c86877fbd9997b415959254ea06ef046dc2e1fdafe7950a77ba94494683e01a0c495dc223a2de73be1474bcdf0b104a89ca6d419254e8f602334158d188f748c5cf4b7473c7475b4cf6c3bfadb50a6126c4fe31d52606b97f347a9d6722a458cc2afdd895c3a247d11e551398180bc445b0ea94d17a1a441fb10b86d84a7549e03b6edf1a12591c63dfa167f2f11ea12b2d3d8f62d92be9238d1e6eed2099f3d0f9e1fe541618bbda588899002c3078202a2d138942c4325b673e494b310a502cda70e8f62480776c31068cb3d2f4c250b9e65669d950b1a4d50cf5f2b11c74960347885e8dbb89d58f24871c34f1a134b1873222b24a310f8bb3299ca1d16cb1921c97fb462e3150b57909ec7d376e93e52ea9e51094f22f11273c32403c82acebf575b7b7af7c98976adf6f4bd4199bd9201fa7321aaad828bfcc3785776f959484ff013d8a66d579af036a6c0e82d94e6eb773f6124f18da5ca4cf5b70f72e9d852766af78269d36a03eb2e2cdda79f16c0f81be27b6593c3f4e9d19cb7018a7e4ca74756dd66ac1b45a4d741e0431d120a7f84dbbc4d7d478b54464050e62d8da0c856ccbc2dcd4dec4aa4d554ac4cce8fbeca8ba4efb55a25771f425a6e5bd74c35972c3da41eeee7fb36b5075e5ab3115f7424f0dab05a085185e923d9ad3e74dc16ff2ecfe03afdf34ba17babafc65aa87600c632ccdcbcc1b591d723eb37a8a3f869cce9fe41 +Tag = ba428ce5296789f9859e377c8c959c6a2b29c5be296a8b0e505b38712d344df1a8ab3da8ca46cc85767414feafd607dd8e3d6707946cc955bcd7707ae74dab89 + + +Title = AES-256-CBC-HMAC-SHA512-ETM test vectors + +Cipher = AES-256-CBC-HMAC-SHA512-ETM +Key = 6cc028952fa7c1ee09fc78b7549ae04d79b54d40ec172333e3a4a2297b62afe5 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f +Ciphertext = 261cd0c88a4d4e6db7fc263257a9f6d0 +Tag = f3e5750ecd6bf757d180ce8a920b86900773f7801014dc57d77e52501bcb657cdc70784ca83f7235b77b6fd0cbdfc374bdf6217a2ebdf426746dfb7fdc458ce9 + +Cipher = AES-256-CBC-HMAC-SHA512-ETM +Key = 6cc028952fa7c1ee09fc78b7549ae04d79b54d40ec172333e3a4a2297b62afe5 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f +Ciphertext = 261cd0c88a4d4e6db7fc263257a9f6d0ce83c1ff5f2680dc57ffd8eefdbb9c00d3d507672d105a990b2b78509978625b9d93c2bd41e3fb721abd1496553c583c67dad9b662b3d58c8540e10ed9c5ed1a7f33ce9e9a41c30836651d73ee2c003af03a919eb41a6d70ef814e184e740f8a +Tag = e6cf78e7e21042a2cf9d0b835c3b1dafc30f3414811b2990fedb5ee47d72c5a3e52daa33c8abafeeace77495e5fd514ab9acdc793ed8b0699fb122bfb45d7d39 + +Cipher = AES-256-CBC-HMAC-SHA512-ETM +Key = 6cc028952fa7c1ee09fc78b7549ae04d79b54d40ec172333e3a4a2297b62afe5 +MACKey = cafebabefacedbaddecaf88801020304 +IV = 101112131415161718191a1b1c1d1e1f +Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f +Ciphertext = 261cd0c88a4d4e6db7fc263257a9f6d0ce83c1ff5f2680dc57ffd8eefdbb9c00d3d507672d105a990b2b78509978625b9d93c2bd41e3fb721abd1496553c583c67dad9b662b3d58c8540e10ed9c5ed1a7f33ce9e9a41c30836651d73ee2c003af03a919eb41a6d70ef814e184e740f8a4ca75016ae77ac335ba758396232a87ffceacf24a0e287371eaa04570cb68dcd61882e1c3f7aca38afed34138fedefe167bb9c741ebd14da2eba3cf5b9aa06bb93ca61fa462de7e1f439efac5ea55edab61171250be36da513e6b5f92c8267f778cdde5720128a586c7bbd5864686b12710daa9f133706e81fa3a066bd1f29277c08ca8f052b3ed06f04ec2a8509f54934fd9b06f4115e546011ff485ac76d5fce0329c94bf5f29726bed49ace94abf53b036c1f920f8c71d44deca7b11f653025698425717bb3cc8f5e74230d8ede675ee0eae6f8aae274152c7503c567427a71323feb84b0fc0515030c933e4c7399be13322b5d4ccabb97c011d75de82f38a540e972bc2a515dc31d50e78b74be891cc4a2ddbe4b50d0d27c069985a581b80a9f591a4bb198f085af2138ca9b4f595c37d60f15d960b1e39de7ff92a699d9aca4a44ff9d327c7130e6b0ce90032e358f3743d8abccaeb0426226d6ec233fdf289bdde5f3b2756a587a382e3353d77acb9774bd64978629633f2122d1fa376b12cfbe4781d6a35227d71fdfa929c1435596fbaf7fe0aea4fa02c6b9e8099c62149ed82819a2088b72660be8ea364c13d5340be93cab8ac92914d2b1115cbb7 +Tag = 6ed97bd77ea4cf480dba39cdcc10601837ddd72c00bf7937855f94514d6ebad1be1cf786f815a95d2a19889f4e4442f0adf91a1621b6ab57727b65ce243ded1a + -- 2.47.2