From: zhoulu Date: Fri, 19 Dec 2025 02:08:02 +0000 (+0800) Subject: SM4-CBC performance improvement on RISC-V X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2d75c5e383194ec2d0e2306232bdb38c3b343c50;p=thirdparty%2Fopenssl.git SM4-CBC performance improvement on RISC-V Modify the IV update method to further improve the performance of SM4-CBC encryption on the RISC-V architecture. Reviewed-by: Paul Dale Reviewed-by: Neil Horman (Merged from https://github.com/openssl/openssl/pull/29451) --- diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm index e5f543a3c34..5d62f3a660d 100644 --- a/crypto/perlasm/riscv.pm +++ b/crypto/perlasm/riscv.pm @@ -468,6 +468,16 @@ sub vadd_vv { return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); } +sub vrgather_vv { + # vrgather.vv vd, vs2, vs1, vm + my $template = 0b001100_0_00000_00000_000_00000_1010111; + my $vd = read_vreg shift; + my $vs2 = read_vreg shift; + my $vs1 = read_vreg shift; + my $vm = read_mask_vreg shift; + return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); +} + sub vadd_vx { # vadd.vx vd, vs2, rs1, vm my $template = 0b000000_0_00000_00000_100_00000_1010111; diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl index 66fd127aed5..c97095ed52e 100644 --- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl +++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl @@ -236,8 +236,14 @@ my ($in,$out,$len,$keys,$ivp)=("a0","a1","a2","a3","a4"); my ($tmp,$base)=("t0","t2"); my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24"); my ($vivec)=("v8"); +my ($vindex)=("v0"); $code .= <<___; +.section .rodata +.align 4 +.Lreverse_index: + .word 3, 2, 1, 0 +.text .p2align 3 .globl rv64i_zvksed_sm4_cbc_encrypt .type rv64i_zvksed_sm4_cbc_encrypt,\@function @@ -254,6 +260,10 @@ rv64i_zvksed_sm4_cbc_encrypt: # Load IV @{[vle32_v $vivec, $ivp]} + + # Load the reverse index (for IV updates) + la $tmp, .Lreverse_index + @{[vle32_v $vindex, $tmp]} # ===================================================== # If data length ≥ 64 bytes, process 4 blocks in batch: # 4-block CBC encryption pipeline: @@ -285,12 +295,8 @@ rv64i_zvksed_sm4_cbc_encrypt: @{[enc_blk $vdata0]} @{[vrev8_v $vdata0, $vdata0]} - # Save the ciphertext (in reverse element order) - li $tmp_stride, $STRIDE - @{[reverse_order_S $vdata0, $out]} #Update IV to ciphertext block 0 - @{[vle32_v $vivec, $out]} - addi $out, $out, $BLOCK_SIZE + @{[vrgather_vv $vivec, $vdata0, $vindex]} @{[vxor_vv $vdata1, $vdata1, $vivec]} @@ -298,11 +304,8 @@ rv64i_zvksed_sm4_cbc_encrypt: @{[enc_blk $vdata1]} @{[vrev8_v $vdata1, $vdata1]} - @{[reverse_order_S $vdata1, $out]} - #Update IV to ciphertext block 1 - @{[vle32_v $vivec, $out]} - addi $out, $out, $BLOCK_SIZE + @{[vrgather_vv $vivec, $vdata1, $vindex]} @{[vxor_vv $vdata2, $vdata2, $vivec]} @@ -310,10 +313,8 @@ rv64i_zvksed_sm4_cbc_encrypt: @{[enc_blk $vdata2]} @{[vrev8_v $vdata2, $vdata2]} - @{[reverse_order_S $vdata2, $out]} #Update IV to ciphertext block 2 - @{[vle32_v $vivec, $out]} - addi $out, $out, $BLOCK_SIZE + @{[vrgather_vv $vivec, $vdata2, $vindex]} @{[vxor_vv $vdata3, $vdata3, $vivec]} @@ -321,9 +322,18 @@ rv64i_zvksed_sm4_cbc_encrypt: @{[enc_blk $vdata3]} @{[vrev8_v $vdata3, $vdata3]} - @{[reverse_order_S $vdata3, $out]} #Update IV to ciphertext block 3 - @{[vle32_v $vivec, $out]} + @{[vrgather_vv $vivec, $vdata3, $vindex]} + + # Save the ciphertext (in reverse element order) + li $tmp_stride, $STRIDE + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata1, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata2, $out]} + addi $out, $out, $BLOCK_SIZE + @{[reverse_order_S $vdata3, $out]} addi $out, $out, $BLOCK_SIZE addi $len, $len, -$FOUR_BLOCKS @@ -344,12 +354,12 @@ rv64i_zvksed_sm4_cbc_encrypt: @{[enc_blk $vdata0]} @{[vrev8_v $vdata0, $vdata0]} + # Update IV to ciphertext block 0 + @{[vrgather_vv $vivec, $vdata0, $vindex]} + # Save the ciphertext (in reverse element order) li $tmp_stride, $STRIDE @{[reverse_order_S $vdata0, $out]} - - # Update IV to ciphertext block 0 - @{[vle32_v $vivec, $out]} addi $out, $out, $BLOCK_SIZE addi $len, $len, -$BLOCK_SIZE