From: zhoulu Date: Tue, 2 Dec 2025 08:34:51 +0000 (+0800) Subject: Performance Optimization of SM4-CBC Encryption and Decryption with Assembly on RISC... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c90b7dddf25f44c3ed27a69ae6bc67cfdb4894cd;p=thirdparty%2Fopenssl.git Performance Optimization of SM4-CBC Encryption and Decryption with Assembly on RISC-V Architecture Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/29137) --- diff --git a/crypto/sm4/asm/sm4-riscv64-zvksed.pl b/crypto/sm4/asm/sm4-riscv64-zvksed.pl index ad8585efc72..66fd127aed5 100644 --- a/crypto/sm4/asm/sm4-riscv64-zvksed.pl +++ b/crypto/sm4/asm/sm4-riscv64-zvksed.pl @@ -59,6 +59,548 @@ my $code=<<___; .text ___ +my $BLOCK_SIZE = 16; +my $STRIDE = -4; # Used for reversing word order +my $FOUR_BLOCKS = 64; +my $EIGHT_BLOCKS = 128; +my ($vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7)=("v16","v17","v18","v19","v20","v21","v22","v23"); +my ($tmp_stride,$tmp_base)=("t1","t2"); +# Loading with word order reversed +sub reverse_order_L { + my $vreg = shift; + my $base_reg = shift; + + return <<___; + addi $tmp_base, $base_reg, 12 + @{[vlse32_v $vreg, $tmp_base, $tmp_stride]} +___ +} + +# Storing with word order reversed +sub reverse_order_S { + my $vreg = shift; + my $base_reg = shift; + + return <<___; + addi $tmp_base, $base_reg, 12 + @{[vsse32_v $vreg, $tmp_base, $tmp_stride]} +___ +} + +# Load 32 round keys +sub enc_load_key { + my $keys = shift; + + my $code=<<___; + # Order of elements was adjusted in set_encrypt_key() + @{[vle32_v $vk0, $keys]} # rk[0:3] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk1, $keys]} # rk[4:7] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk2, $keys]} # rk[8:11] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk3, $keys]} # rk[12:15] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk4, $keys]} # rk[16:19] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk5, $keys]} # rk[20:23] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk6, $keys]} # rk[24:27] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk7, $keys]} # rk[28:31] +___ + + return $code; +} + +sub dec_load_key { + my $keys = shift; + + my $code=<<___; + # Order of elements was adjusted in set_decrypt_key() + @{[vle32_v $vk7, $keys]} # rk[31:28] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk6, $keys]} # rk[27:24] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk5, $keys]} # rk[23:20] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk4, $keys]} # rk[19:16] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk3, $keys]} # rk[15:12] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk2, $keys]} # rk[11:8] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk1, $keys]} # rk[7:4] + addi $keys, $keys, $BLOCK_SIZE + @{[vle32_v $vk0, $keys]} # rk[3:0] +___ + + return $code; +} + +# Encrypt with all keys +sub enc_blk { + my $data = shift; + + my $code=<<___; + @{[vsm4r_vs $data, $vk0]} + @{[vsm4r_vs $data, $vk1]} + @{[vsm4r_vs $data, $vk2]} + @{[vsm4r_vs $data, $vk3]} + @{[vsm4r_vs $data, $vk4]} + @{[vsm4r_vs $data, $vk5]} + @{[vsm4r_vs $data, $vk6]} + @{[vsm4r_vs $data, $vk7]} +___ + + return $code; +} + +# Decrypt with all keys +sub dec_blk { + my $data = shift; + + my $code=<<___; + @{[vsm4r_vs $data, $vk7]} + @{[vsm4r_vs $data, $vk6]} + @{[vsm4r_vs $data, $vk5]} + @{[vsm4r_vs $data, $vk4]} + @{[vsm4r_vs $data, $vk3]} + @{[vsm4r_vs $data, $vk2]} + @{[vsm4r_vs $data, $vk1]} + @{[vsm4r_vs $data, $vk0]} +___ + + return $code; +} + +# Decrypt 4 blocks with all keys +sub dec_4blks { + my $data0 = shift; + my $data1 = shift; + my $data2 = shift; + my $data3 = shift; + + my $code=<<___; + @{[vsm4r_vs $data0, $vk7]} + @{[vsm4r_vs $data1, $vk7]} + @{[vsm4r_vs $data2, $vk7]} + @{[vsm4r_vs $data3, $vk7]} + + @{[vsm4r_vs $data0, $vk6]} + @{[vsm4r_vs $data1, $vk6]} + @{[vsm4r_vs $data2, $vk6]} + @{[vsm4r_vs $data3, $vk6]} + + @{[vsm4r_vs $data0, $vk5]} + @{[vsm4r_vs $data1, $vk5]} + @{[vsm4r_vs $data2, $vk5]} + @{[vsm4r_vs $data3, $vk5]} + + @{[vsm4r_vs $data0, $vk4]} + @{[vsm4r_vs $data1, $vk4]} + @{[vsm4r_vs $data2, $vk4]} + @{[vsm4r_vs $data3, $vk4]} + + @{[vsm4r_vs $data0, $vk3]} + @{[vsm4r_vs $data1, $vk3]} + @{[vsm4r_vs $data2, $vk3]} + @{[vsm4r_vs $data3, $vk3]} + + @{[vsm4r_vs $data0, $vk2]} + @{[vsm4r_vs $data1, $vk2]} + @{[vsm4r_vs $data2, $vk2]} + @{[vsm4r_vs $data3, $vk2]} + + @{[vsm4r_vs $data0, $vk1]} + @{[vsm4r_vs $data1, $vk1]} + @{[vsm4r_vs $data2, $vk1]} + @{[vsm4r_vs $data3, $vk1]} + + @{[vsm4r_vs $data0, $vk0]} + @{[vsm4r_vs $data1, $vk0]} + @{[vsm4r_vs $data2, $vk0]} + @{[vsm4r_vs $data3, $vk0]} +___ + + return $code; +} + +#### +# void rv64i_zvksed_sm4_cbc_encrypt(const unsigned char *in, unsigned char *out, +# size_t len, const SM4_KEY *key, +# unsigned char *iv, int enc); +# +{ +my ($in,$out,$len,$keys,$ivp)=("a0","a1","a2","a3","a4"); +my ($tmp,$base)=("t0","t2"); +my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24"); +my ($vivec)=("v8"); + +$code .= <<___; +.p2align 3 +.globl rv64i_zvksed_sm4_cbc_encrypt +.type rv64i_zvksed_sm4_cbc_encrypt,\@function +rv64i_zvksed_sm4_cbc_encrypt: + # check whether the length is a multiple of 16 and >= 16 + li $tmp, $BLOCK_SIZE + bltu $len, $tmp, .Lcbc_enc_end + andi $tmp, $len, 15 + bnez $tmp, .Lcbc_enc_end + + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + # Load 32 round keys + @{[enc_load_key $keys]} + + # Load IV + @{[vle32_v $vivec, $ivp]} +# ===================================================== +# If data length ≥ 64 bytes, process 4 blocks in batch: +# 4-block CBC encryption pipeline: +# 1. Load 4 plaintext blocks +# 2. Reverse bytes for SM4 endianness +# 3. Perform XOR operation with IV or previous ciphertext block (CBC chain) +# 4. Encrypt each data block using the enc_blk function +# 5. Adjust the byte order and store the ciphertext block +# 6. Update the initialization vector (IV) +# If data length < 64 bytes, process it block by block using the Lcbc_enc_single function +# ===================================================== +.Lcbc_enc_loop: + li $tmp, $FOUR_BLOCKS + bltu $len, $tmp, .Lcbc_enc_single + # Load input data0-data3 + @{[vle32_v $vdata0, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata1, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata2, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata3, $in]} + addi $in, $in, $BLOCK_SIZE + #XOR with IV + @{[vxor_vv $vdata0, $vdata0, $vivec]} + + @{[vrev8_v $vdata0, $vdata0]} + # Encrypt with all keys + @{[enc_blk $vdata0]} + @{[vrev8_v $vdata0, $vdata0]} + + # Save the ciphertext (in reverse element order) + li $tmp_stride, $STRIDE + @{[reverse_order_S $vdata0, $out]} + #Update IV to ciphertext block 0 + @{[vle32_v $vivec, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata1, $vdata1, $vivec]} + + @{[vrev8_v $vdata1, $vdata1]} + @{[enc_blk $vdata1]} + @{[vrev8_v $vdata1, $vdata1]} + + @{[reverse_order_S $vdata1, $out]} + + #Update IV to ciphertext block 1 + @{[vle32_v $vivec, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata2, $vdata2, $vivec]} + + @{[vrev8_v $vdata2, $vdata2]} + @{[enc_blk $vdata2]} + @{[vrev8_v $vdata2, $vdata2]} + + @{[reverse_order_S $vdata2, $out]} + #Update IV to ciphertext block 2 + @{[vle32_v $vivec, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata3, $vdata3, $vivec]} + + @{[vrev8_v $vdata3, $vdata3]} + @{[enc_blk $vdata3]} + @{[vrev8_v $vdata3, $vdata3]} + + @{[reverse_order_S $vdata3, $out]} + #Update IV to ciphertext block 3 + @{[vle32_v $vivec, $out]} + addi $out, $out, $BLOCK_SIZE + + addi $len, $len, -$FOUR_BLOCKS + bnez $len, .Lcbc_enc_loop + #Save the final IV + @{[vse32_v $vivec, $ivp]} + ret + +.Lcbc_enc_single: + # Load input data0 + @{[vle32_v $vdata0, $in]} + addi $in, $in, $BLOCK_SIZE + #XOR with IV + @{[vxor_vv $vdata0, $vdata0, $vivec]} + + @{[vrev8_v $vdata0, $vdata0]} + # Encrypt with all keys + @{[enc_blk $vdata0]} + @{[vrev8_v $vdata0, $vdata0]} + + # Save the ciphertext (in reverse element order) + li $tmp_stride, $STRIDE + @{[reverse_order_S $vdata0, $out]} + + # Update IV to ciphertext block 0 + @{[vle32_v $vivec, $out]} + addi $out, $out, $BLOCK_SIZE + addi $len, $len, -$BLOCK_SIZE + + li $tmp, $BLOCK_SIZE + bgeu $len, $tmp, .Lcbc_enc_single + # Save the final IV + @{[vse32_v $vivec, $ivp]} +.Lcbc_enc_end: + ret +.size rv64i_zvksed_sm4_cbc_encrypt,.-rv64i_zvksed_sm4_cbc_encrypt +___ + +#### +# void rv64i_zvksed_sm4_cbc_decrypt(const unsigned char *in, unsigned char *out, +# size_t len, const SM4_KEY *key, +# unsigned char *iv, int enc); +# +$code .= <<___; +.p2align 3 +.globl rv64i_zvksed_sm4_cbc_decrypt +.type rv64i_zvksed_sm4_cbc_decrypt,\@function +rv64i_zvksed_sm4_cbc_decrypt: + # check whether the length is a multiple of 16 and >= 16 + li $tmp, $BLOCK_SIZE + bltu $len, $tmp, .Lcbc_dec_end + andi $tmp, $len, 15 + bnez $tmp, .Lcbc_dec_end + + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + # Load IV (in reverse element order) + li $tmp_stride, $STRIDE + @{[reverse_order_L $vivec, $ivp]} + + # Load 32 round keys + @{[dec_load_key $keys]} +# ===================================================== +# If data length ≥ 128 bytes, process 8 blocks in batch: +# 8-block CBC decryption pipeline: +# 1. Load 8 ciphertext blocks +# 2. Reverse bytes for SM4 endianness +# 3. Use two calls to dec_4blks for decrypting each data block +# 4. XOR with previous ciphertext block (CBC chain) +# 5. Update IV and store plaintext with byte reversal +# ===================================================== +.Lcbc_dec_loop: + li $tmp, $EIGHT_BLOCKS + bltu $len, $tmp, .Lcbc_check_64 + # Load input data0-data7 + @{[vle32_v $vdata0, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata1, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata2, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata3, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata4, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata5, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata6, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata7, $in]} + addi $in, $in, $BLOCK_SIZE + + @{[vrev8_v $vdata0, $vdata0]} + @{[vrev8_v $vdata1, $vdata1]} + @{[vrev8_v $vdata2, $vdata2]} + @{[vrev8_v $vdata3, $vdata3]} + @{[vrev8_v $vdata4, $vdata4]} + @{[vrev8_v $vdata5, $vdata5]} + @{[vrev8_v $vdata6, $vdata6]} + @{[vrev8_v $vdata7, $vdata7]} + # Decrypt 8 data blocks + @{[dec_4blks $vdata0,$vdata1,$vdata2,$vdata3]} + @{[dec_4blks $vdata4,$vdata5,$vdata6,$vdata7]} + @{[vrev8_v $vdata0, $vdata0]} + @{[vrev8_v $vdata1, $vdata1]} + @{[vrev8_v $vdata2, $vdata2]} + @{[vrev8_v $vdata3, $vdata3]} + @{[vrev8_v $vdata4, $vdata4]} + @{[vrev8_v $vdata5, $vdata5]} + @{[vrev8_v $vdata6, $vdata6]} + @{[vrev8_v $vdata7, $vdata7]} + + @{[vxor_vv $vdata0, $vdata0, $vivec]} + + # Update ciphertext to IV (in reverse element order) + addi $base, $in, -128 + @{[reverse_order_L $vivec, $base]} + + # Save the plaintext (in reverse element order) + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata1, $vdata1, $vivec]} + + addi $base, $in, -112 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata1, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata2, $vdata2, $vivec]} + + addi $base, $in, -96 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata2, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata3, $vdata3, $vivec]} + + addi $base, $in, -80 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata3, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata4, $vdata4, $vivec]} + + addi $base, $in, -64 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata4, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata5, $vdata5, $vivec]} + + addi $base, $in, -48 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata5, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata6, $vdata6, $vivec]} + + addi $base, $in, -32 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata6, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata7, $vdata7, $vivec]} + + addi $base, $in, -16 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata7, $out]} + addi $out, $out, $BLOCK_SIZE + + addi $len, $len, -$EIGHT_BLOCKS + bnez $len, .Lcbc_dec_loop + #Save the final IV (in reverse element order) + @{[reverse_order_S $vivec, $ivp]} + ret +# ===================================================== +# If data length ≥ 64 bytes, process in batches of 4 blocks: +# 4-block CBC decryption process: +# 1. Load 4 ciphertext blocks +# 2. Reverse byte order to fit SM4 byte order +# 3. Decrypt each data block using the dec_4blks function +# 4. XOR with previous ciphertext block (CBC chain) +# 5. Update IV and store plaintext with byte reversal +# If the data length is less than 64 bytes, process it block by block using the Lcbc_dec_single function +# ===================================================== +.Lcbc_check_64: + li $tmp, $FOUR_BLOCKS + bltu $len, $tmp, .Lcbc_dec_single + # Load input data0-data3 + @{[vle32_v $vdata0, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata1, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata2, $in]} + addi $in, $in, $BLOCK_SIZE + @{[vle32_v $vdata3, $in]} + addi $in, $in, $BLOCK_SIZE + + @{[vrev8_v $vdata0, $vdata0]} + @{[vrev8_v $vdata1, $vdata1]} + @{[vrev8_v $vdata2, $vdata2]} + @{[vrev8_v $vdata3, $vdata3]} + # Decrypt 4 data blocks + @{[dec_4blks $vdata0,$vdata1,$vdata2,$vdata3]} + @{[vrev8_v $vdata0, $vdata0]} + @{[vrev8_v $vdata1, $vdata1]} + @{[vrev8_v $vdata2, $vdata2]} + @{[vrev8_v $vdata3, $vdata3]} + + @{[vxor_vv $vdata0, $vdata0, $vivec]} + + # Update ciphertext to IV (in reverse element order) + addi $base, $in, -64 + @{[reverse_order_L $vivec, $base]} + # Save the plaintext (in reverse element order) + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata1, $vdata1, $vivec]} + + addi $base, $in, -48 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata1, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata2, $vdata2, $vivec]} + + addi $base, $in, -32 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata2, $out]} + addi $out, $out, $BLOCK_SIZE + + @{[vxor_vv $vdata3, $vdata3, $vivec]} + + addi $base, $in, -16 + @{[reverse_order_L $vivec, $base]} + @{[reverse_order_S $vdata3, $out]} + addi $out, $out, $BLOCK_SIZE + + addi $len, $len, -$FOUR_BLOCKS + bnez $len, .Lcbc_check_64 + #Save the final IV (in reverse element order) + @{[reverse_order_S $vivec, $ivp]} + ret + +.Lcbc_dec_single: + # Load input data0 + @{[vle32_v $vdata0, $in]} + addi $in, $in, $BLOCK_SIZE + + @{[vrev8_v $vdata0, $vdata0]} + # Decrypt with all keys + @{[dec_blk $vdata0]} + @{[vrev8_v $vdata0, $vdata0]} + + #XOR with IV + @{[vxor_vv $vdata0, $vdata0, $vivec]} + + # Update ciphertext to IV (in reverse element order) + li $tmp_stride, $STRIDE + addi $base, $in, -$BLOCK_SIZE + @{[reverse_order_L $vivec, $base]} + # Save the plaintext (in reverse element order) + @{[reverse_order_S $vdata0, $out]} + addi $out, $out, $BLOCK_SIZE + addi $len, $len, -$BLOCK_SIZE + + li $tmp, $BLOCK_SIZE + bgeu $len, $tmp, .Lcbc_dec_single + #Save the final IV (in reverse element order) + @{[reverse_order_S $vivec, $ivp]} +.Lcbc_dec_end: + ret +.size rv64i_zvksed_sm4_cbc_decrypt,.-rv64i_zvksed_sm4_cbc_decrypt +___ +} + #### # int rv64i_zvksed_sm4_set_encrypt_key(const unsigned char *userKey, # SM4_KEY *key); @@ -94,19 +636,19 @@ rv64i_zvksed_sm4_set_encrypt_key: # Store round keys @{[vse32_v $vk0, $keys]} # rk[0:3] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk1, $keys]} # rk[4:7] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk2, $keys]} # rk[8:11] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk3, $keys]} # rk[12:15] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk4, $keys]} # rk[16:19] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk5, $keys]} # rk[20:23] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk6, $keys]} # rk[24:27] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vse32_v $vk7, $keys]} # rk[28:31] li a0, 1 @@ -150,21 +692,21 @@ rv64i_zvksed_sm4_set_decrypt_key: # Store round keys in reverse order addi $keys, $keys, 12 - li $stride, -4 + li $stride, $STRIDE @{[vsse32_v $vk7, $keys, $stride]} # rk[31:28] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk6, $keys, $stride]} # rk[27:24] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk5, $keys, $stride]} # rk[23:20] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk4, $keys, $stride]} # rk[19:16] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk3, $keys, $stride]} # rk[15:12] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk2, $keys, $stride]} # rk[11:8] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk1, $keys, $stride]} # rk[7:4] - addi $keys, $keys, 16 + addi $keys, $keys, $BLOCK_SIZE @{[vsse32_v $vk0, $keys, $stride]} # rk[3:0] li a0, 1 @@ -178,8 +720,8 @@ ___ # const SM4_KEY *key); # { -my ($in,$out,$keys,$stride)=("a0","a1","a2","t0"); -my ($vdata,$vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7,$vgen)=("v1","v2","v3","v4","v5","v6","v7","v8","v9","v10"); +my ($in,$out,$keys)=("a0","a1","a2"); +my ($vdata)=("v1"); $code .= <<___; .p2align 3 .globl rv64i_zvksed_sm4_encrypt @@ -187,42 +729,19 @@ $code .= <<___; rv64i_zvksed_sm4_encrypt: @{[vsetivli__x0_4_e32_m1_tu_mu]} - # Order of elements was adjusted in set_encrypt_key() - @{[vle32_v $vk0, $keys]} # rk[0:3] - addi $keys, $keys, 16 - @{[vle32_v $vk1, $keys]} # rk[4:7] - addi $keys, $keys, 16 - @{[vle32_v $vk2, $keys]} # rk[8:11] - addi $keys, $keys, 16 - @{[vle32_v $vk3, $keys]} # rk[12:15] - addi $keys, $keys, 16 - @{[vle32_v $vk4, $keys]} # rk[16:19] - addi $keys, $keys, 16 - @{[vle32_v $vk5, $keys]} # rk[20:23] - addi $keys, $keys, 16 - @{[vle32_v $vk6, $keys]} # rk[24:27] - addi $keys, $keys, 16 - @{[vle32_v $vk7, $keys]} # rk[28:31] + @{[enc_load_key $keys]} # Load input data @{[vle32_v $vdata, $in]} @{[vrev8_v $vdata, $vdata]} # Encrypt with all keys - @{[vsm4r_vs $vdata, $vk0]} - @{[vsm4r_vs $vdata, $vk1]} - @{[vsm4r_vs $vdata, $vk2]} - @{[vsm4r_vs $vdata, $vk3]} - @{[vsm4r_vs $vdata, $vk4]} - @{[vsm4r_vs $vdata, $vk5]} - @{[vsm4r_vs $vdata, $vk6]} - @{[vsm4r_vs $vdata, $vk7]} + @{[enc_blk $vdata]} # Save the ciphertext (in reverse element order) @{[vrev8_v $vdata, $vdata]} - li $stride, -4 - addi $out, $out, 12 - @{[vsse32_v $vdata, $out, $stride]} + li $tmp_stride, $STRIDE + @{[reverse_order_S $vdata, $out]} ret .size rv64i_zvksed_sm4_encrypt,.-rv64i_zvksed_sm4_encrypt @@ -234,8 +753,8 @@ ___ # const SM4_KEY *key); # { -my ($in,$out,$keys,$stride)=("a0","a1","a2","t0"); -my ($vdata,$vk0,$vk1,$vk2,$vk3,$vk4,$vk5,$vk6,$vk7,$vgen)=("v1","v2","v3","v4","v5","v6","v7","v8","v9","v10"); +my ($in,$out,$keys)=("a0","a1","a2"); +my ($vdata)=("v1"); $code .= <<___; .p2align 3 .globl rv64i_zvksed_sm4_decrypt @@ -243,42 +762,19 @@ $code .= <<___; rv64i_zvksed_sm4_decrypt: @{[vsetivli__x0_4_e32_m1_tu_mu]} - # Order of elements was adjusted in set_decrypt_key() - @{[vle32_v $vk7, $keys]} # rk[31:28] - addi $keys, $keys, 16 - @{[vle32_v $vk6, $keys]} # rk[27:24] - addi $keys, $keys, 16 - @{[vle32_v $vk5, $keys]} # rk[23:20] - addi $keys, $keys, 16 - @{[vle32_v $vk4, $keys]} # rk[19:16] - addi $keys, $keys, 16 - @{[vle32_v $vk3, $keys]} # rk[15:12] - addi $keys, $keys, 16 - @{[vle32_v $vk2, $keys]} # rk[11:8] - addi $keys, $keys, 16 - @{[vle32_v $vk1, $keys]} # rk[7:4] - addi $keys, $keys, 16 - @{[vle32_v $vk0, $keys]} # rk[3:0] + @{[dec_load_key $keys]} # Load input data @{[vle32_v $vdata, $in]} @{[vrev8_v $vdata, $vdata]} # Decrypt with all keys - @{[vsm4r_vs $vdata, $vk7]} - @{[vsm4r_vs $vdata, $vk6]} - @{[vsm4r_vs $vdata, $vk5]} - @{[vsm4r_vs $vdata, $vk4]} - @{[vsm4r_vs $vdata, $vk3]} - @{[vsm4r_vs $vdata, $vk2]} - @{[vsm4r_vs $vdata, $vk1]} - @{[vsm4r_vs $vdata, $vk0]} + @{[dec_blk $vdata]} # Save the plaintext (in reverse element order) @{[vrev8_v $vdata, $vdata]} - li $stride, -4 - addi $out, $out, 12 - @{[vsse32_v $vdata, $out, $stride]} + li $tmp_stride, $STRIDE + @{[reverse_order_S $vdata, $out]} ret .size rv64i_zvksed_sm4_decrypt,.-rv64i_zvksed_sm4_decrypt diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h index 0e9e2e1d397..f7b436255c2 100644 --- a/include/crypto/sm4_platform.h +++ b/include/crypto/sm4_platform.h @@ -50,6 +50,12 @@ void rv64i_zvksed_sm4_encrypt(const unsigned char *in, unsigned char *out, const SM4_KEY *key); void rv64i_zvksed_sm4_decrypt(const unsigned char *in, unsigned char *out, const SM4_KEY *key); +void rv64i_zvksed_sm4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const SM4_KEY *key, + unsigned char *iv, int enc); +void rv64i_zvksed_sm4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const SM4_KEY *key, + unsigned char *iv, int enc); # elif (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)) /* Intel x86_64 support */ # include "internal/cryptlib.h" diff --git a/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc b/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc index 763d9d09dd9..683b2b25a1f 100644 --- a/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc +++ b/providers/implementations/ciphers/cipher_sm4_hw_rv64i.inc @@ -26,6 +26,7 @@ static int cipher_hw_rv64i_zvksed_sm4_initkey(PROV_CIPHER_CTX *ctx, SM4_KEY *ks = &sctx->ks.ks; ctx->ks = ks; + if (ctx->enc || (ctx->mode != EVP_CIPH_ECB_MODE && ctx->mode != EVP_CIPH_CBC_MODE)) { @@ -38,6 +39,14 @@ static int cipher_hw_rv64i_zvksed_sm4_initkey(PROV_CIPHER_CTX *ctx, ctx->stream.cbc = NULL; } + if (ctx->mode == EVP_CIPH_CBC_MODE) { + if (ctx->enc) { + ctx->stream.cbc = (cbc128_f) rv64i_zvksed_sm4_cbc_encrypt; + } else { + ctx->stream.cbc = (cbc128_f) rv64i_zvksed_sm4_cbc_decrypt; + } + } + return 1; } diff --git a/test/recipes/30-test_evp_data/evpciph_sm4.txt b/test/recipes/30-test_evp_data/evpciph_sm4.txt index 993cf7b51e5..f23129cd8d0 100644 --- a/test/recipes/30-test_evp_data/evpciph_sm4.txt +++ b/test/recipes/30-test_evp_data/evpciph_sm4.txt @@ -13,6 +13,12 @@ Key = 0123456789ABCDEFFEDCBA9876543210 Plaintext = 0123456789ABCDEFFEDCBA9876543210 Ciphertext = 681EDF34D206965E86B3E94F536E4246 +Cipher = SM4-CBC +Key = 0123456789ABCDEFFEDCBA9876543210 +IV = 0123456789ABCDEFFEDCBA9876543210 +Plaintext = 0123456789ABCDEFFEDCBA9876543210 +Ciphertext = 2677F46B09C122CC975533105BD4A22A + Cipher = SM4-CBC Key = 0123456789ABCDEFFEDCBA9876543210 IV = 0123456789ABCDEFFEDCBA9876543210