From: zhoulu Date: Fri, 26 Sep 2025 10:43:08 +0000 (+0800) Subject: Instruction Reordering Further Optimizes OpenSSL SHA256 Performance on RISC-V X-Git-Tag: 4.0-PRE-CLANG-FORMAT-WEBKIT~224 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68bf1a8eee6f85c7972eb64a94f5a6638c84832b;p=thirdparty%2Fopenssl.git Instruction Reordering Further Optimizes OpenSSL SHA256 Performance on RISC-V Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/28673) --- diff --git a/crypto/sha/asm/sha256-riscv64-zvkb-zvknha_or_zvknhb.pl b/crypto/sha/asm/sha256-riscv64-zvkb-zvknha_or_zvknhb.pl index 5e4d6be3457..74fc9a7555e 100644 --- a/crypto/sha/asm/sha256-riscv64-zvkb-zvknha_or_zvknhb.pl +++ b/crypto/sha/asm/sha256-riscv64-zvkb-zvknha_or_zvknhb.pl @@ -117,9 +117,11 @@ $code .= <<___; .globl sha256_block_data_order_zvkb_zvknha_or_zvknhb .type sha256_block_data_order_zvkb_zvknha_or_zvknhb,\@function sha256_block_data_order_zvkb_zvknha_or_zvknhb: - @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} - @{[sha_256_load_constant]} + # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling. + # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking. + @{[vsetivli "zero", 1, "e8", "m1", "ta", "ma"]} + @{[vmv_v_i $V0, 0x01]} # H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c} # The dst vtype is e32m1 and the index vtype is e8mf4. @@ -141,12 +143,7 @@ sha256_block_data_order_zvkb_zvknha_or_zvknhb: @{[vluxei8_v $V6, $H, $V26]} @{[vluxei8_v $V7, $H2, $V26]} - # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling. - # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking. - @{[vsetivli "zero", 1, "e8", "m1", "ta", "ma"]} - @{[vmv_v_i $V0, 0x01]} - - @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[sha_256_load_constant]} L_round_loop: # Decrement length by 1