From: Phoebe Chen Date: Wed, 4 Oct 2023 08:42:10 +0000 (-0700) Subject: riscv: Further optimization for single block aes-zvkned encryption. X-Git-Tag: openssl-3.3.0-alpha1~747 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=42f112284862bb0744d37c2f3301c7163179cf15;p=thirdparty%2Fopenssl.git riscv: Further optimization for single block aes-zvkned encryption. Interleave key loading and aes encrypt computing for single block aes. Signed-off-by: Phoebe Chen Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale Reviewed-by: Hugo Landau (Merged from https://github.com/openssl/openssl/pull/21923) --- diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl index e0f5f19ff5c..1cf5fddb5cf 100644 --- a/crypto/aes/asm/aes-riscv64-zvkned.pl +++ b/crypto/aes/asm/aes-riscv64-zvkned.pl @@ -273,42 +273,41 @@ ___ $code .= <<___; .p2align 3 L_enc_128: - @{[vsetivli__x0_4_e32_m1_tu_mu]} + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + + @{[vle32_v $v1, ($INP)]} @{[vle32_v $v10, ($KEYP)]} + @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 @{[vle32_v $v11, ($KEYP)]} + @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7] addi $KEYP, $KEYP, 16 @{[vle32_v $v12, ($KEYP)]} + @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11] addi $KEYP, $KEYP, 16 @{[vle32_v $v13, ($KEYP)]} + @{[vaesem_vs $v1, $v13]} # with round key w[12,15] addi $KEYP, $KEYP, 16 @{[vle32_v $v14, ($KEYP)]} + @{[vaesem_vs $v1, $v14]} # with round key w[16,19] addi $KEYP, $KEYP, 16 @{[vle32_v $v15, ($KEYP)]} + @{[vaesem_vs $v1, $v15]} # with round key w[20,23] addi $KEYP, $KEYP, 16 @{[vle32_v $v16, ($KEYP)]} + @{[vaesem_vs $v1, $v16]} # with round key w[24,27] addi $KEYP, $KEYP, 16 @{[vle32_v $v17, ($KEYP)]} + @{[vaesem_vs $v1, $v17]} # with round key w[28,31] addi $KEYP, $KEYP, 16 @{[vle32_v $v18, ($KEYP)]} + @{[vaesem_vs $v1, $v18]} # with round key w[32,35] addi $KEYP, $KEYP, 16 @{[vle32_v $v19, ($KEYP)]} + @{[vaesem_vs $v1, $v19]} # with round key w[36,39] addi $KEYP, $KEYP, 16 @{[vle32_v $v20, ($KEYP)]} - - @{[vle32_v $v1, ($INP)]} - - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] - @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7] - @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11] - @{[vaesem_vs $v1, $v13]} # with round key w[12,15] - @{[vaesem_vs $v1, $v14]} # with round key w[16,19] - @{[vaesem_vs $v1, $v15]} # with round key w[20,23] - @{[vaesem_vs $v1, $v16]} # with round key w[24,27] - @{[vaesem_vs $v1, $v17]} # with round key w[28,31] - @{[vaesem_vs $v1, $v18]} # with round key w[32,35] - @{[vaesem_vs $v1, $v19]} # with round key w[36,39] @{[vaesef_vs $v1, $v20]} # with round key w[40,43] @{[vse32_v $v1, ($OUTP)]} @@ -320,48 +319,47 @@ ___ $code .= <<___; .p2align 3 L_enc_192: - @{[vsetivli__x0_4_e32_m1_tu_mu]} + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + + @{[vle32_v $v1, ($INP)]} @{[vle32_v $v10, ($KEYP)]} + @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 @{[vle32_v $v11, ($KEYP)]} + @{[vaesem_vs $v1, $v11]} addi $KEYP, $KEYP, 16 @{[vle32_v $v12, ($KEYP)]} + @{[vaesem_vs $v1, $v12]} addi $KEYP, $KEYP, 16 @{[vle32_v $v13, ($KEYP)]} + @{[vaesem_vs $v1, $v13]} addi $KEYP, $KEYP, 16 @{[vle32_v $v14, ($KEYP)]} + @{[vaesem_vs $v1, $v14]} addi $KEYP, $KEYP, 16 @{[vle32_v $v15, ($KEYP)]} + @{[vaesem_vs $v1, $v15]} addi $KEYP, $KEYP, 16 @{[vle32_v $v16, ($KEYP)]} + @{[vaesem_vs $v1, $v16]} addi $KEYP, $KEYP, 16 @{[vle32_v $v17, ($KEYP)]} + @{[vaesem_vs $v1, $v17]} addi $KEYP, $KEYP, 16 @{[vle32_v $v18, ($KEYP)]} + @{[vaesem_vs $v1, $v18]} addi $KEYP, $KEYP, 16 @{[vle32_v $v19, ($KEYP)]} + @{[vaesem_vs $v1, $v19]} addi $KEYP, $KEYP, 16 @{[vle32_v $v20, ($KEYP)]} + @{[vaesem_vs $v1, $v20]} addi $KEYP, $KEYP, 16 @{[vle32_v $v21, ($KEYP)]} + @{[vaesem_vs $v1, $v21]} addi $KEYP, $KEYP, 16 @{[vle32_v $v22, ($KEYP)]} - - @{[vle32_v $v1, ($INP)]} - - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] - @{[vaesem_vs $v1, $v11]} - @{[vaesem_vs $v1, $v12]} - @{[vaesem_vs $v1, $v13]} - @{[vaesem_vs $v1, $v14]} - @{[vaesem_vs $v1, $v15]} - @{[vaesem_vs $v1, $v16]} - @{[vaesem_vs $v1, $v17]} - @{[vaesem_vs $v1, $v18]} - @{[vaesem_vs $v1, $v19]} - @{[vaesem_vs $v1, $v20]} - @{[vaesem_vs $v1, $v21]} @{[vaesef_vs $v1, $v22]} @{[vse32_v $v1, ($OUTP)]} @@ -372,54 +370,53 @@ ___ $code .= <<___; .p2align 3 L_enc_256: - @{[vsetivli__x0_4_e32_m1_tu_mu]} + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + + @{[vle32_v $v1, ($INP)]} @{[vle32_v $v10, ($KEYP)]} + @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] addi $KEYP, $KEYP, 16 @{[vle32_v $v11, ($KEYP)]} + @{[vaesem_vs $v1, $v11]} addi $KEYP, $KEYP, 16 @{[vle32_v $v12, ($KEYP)]} + @{[vaesem_vs $v1, $v12]} addi $KEYP, $KEYP, 16 @{[vle32_v $v13, ($KEYP)]} + @{[vaesem_vs $v1, $v13]} addi $KEYP, $KEYP, 16 @{[vle32_v $v14, ($KEYP)]} + @{[vaesem_vs $v1, $v14]} addi $KEYP, $KEYP, 16 @{[vle32_v $v15, ($KEYP)]} + @{[vaesem_vs $v1, $v15]} addi $KEYP, $KEYP, 16 @{[vle32_v $v16, ($KEYP)]} + @{[vaesem_vs $v1, $v16]} addi $KEYP, $KEYP, 16 @{[vle32_v $v17, ($KEYP)]} + @{[vaesem_vs $v1, $v17]} addi $KEYP, $KEYP, 16 @{[vle32_v $v18, ($KEYP)]} + @{[vaesem_vs $v1, $v18]} addi $KEYP, $KEYP, 16 @{[vle32_v $v19, ($KEYP)]} + @{[vaesem_vs $v1, $v19]} addi $KEYP, $KEYP, 16 @{[vle32_v $v20, ($KEYP)]} + @{[vaesem_vs $v1, $v20]} addi $KEYP, $KEYP, 16 @{[vle32_v $v21, ($KEYP)]} + @{[vaesem_vs $v1, $v21]} addi $KEYP, $KEYP, 16 @{[vle32_v $v22, ($KEYP)]} + @{[vaesem_vs $v1, $v22]} addi $KEYP, $KEYP, 16 @{[vle32_v $v23, ($KEYP)]} + @{[vaesem_vs $v1, $v23]} addi $KEYP, $KEYP, 16 @{[vle32_v $v24, ($KEYP)]} - - @{[vle32_v $v1, ($INP)]} - - @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3] - @{[vaesem_vs $v1, $v11]} - @{[vaesem_vs $v1, $v12]} - @{[vaesem_vs $v1, $v13]} - @{[vaesem_vs $v1, $v14]} - @{[vaesem_vs $v1, $v15]} - @{[vaesem_vs $v1, $v16]} - @{[vaesem_vs $v1, $v17]} - @{[vaesem_vs $v1, $v18]} - @{[vaesem_vs $v1, $v19]} - @{[vaesem_vs $v1, $v20]} - @{[vaesem_vs $v1, $v21]} - @{[vaesem_vs $v1, $v22]} - @{[vaesem_vs $v1, $v23]} @{[vaesef_vs $v1, $v24]} @{[vse32_v $v1, ($OUTP)]}