my ($tmp,$base)=("t0","t2");
my ($vdata0,$vdata1,$vdata2,$vdata3,$vdata4,$vdata5,$vdata6,$vdata7)=("v1","v2","v3","v4","v5","v6","v7","v24");
my ($vivec)=("v8");
+my ($vindex)=("v0");
$code .= <<___;
+.section .rodata
+.align 4
+.Lreverse_index:
+ .word 3, 2, 1, 0
+.text
.p2align 3
.globl rv64i_zvksed_sm4_cbc_encrypt
.type rv64i_zvksed_sm4_cbc_encrypt,\@function
# Load IV
@{[vle32_v $vivec, $ivp]}
+
+ # Load the reverse index (for IV updates)
+ la $tmp, .Lreverse_index
+ @{[vle32_v $vindex, $tmp]}
# =====================================================
# If data length ≥ 64 bytes, process 4 blocks in batch:
# 4-block CBC encryption pipeline:
@{[enc_blk $vdata0]}
@{[vrev8_v $vdata0, $vdata0]}
- # Save the ciphertext (in reverse element order)
- li $tmp_stride, $STRIDE
- @{[reverse_order_S $vdata0, $out]}
#Update IV to ciphertext block 0
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata0, $vindex]}
@{[vxor_vv $vdata1, $vdata1, $vivec]}
@{[enc_blk $vdata1]}
@{[vrev8_v $vdata1, $vdata1]}
- @{[reverse_order_S $vdata1, $out]}
-
#Update IV to ciphertext block 1
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata1, $vindex]}
@{[vxor_vv $vdata2, $vdata2, $vivec]}
@{[enc_blk $vdata2]}
@{[vrev8_v $vdata2, $vdata2]}
- @{[reverse_order_S $vdata2, $out]}
#Update IV to ciphertext block 2
- @{[vle32_v $vivec, $out]}
- addi $out, $out, $BLOCK_SIZE
+ @{[vrgather_vv $vivec, $vdata2, $vindex]}
@{[vxor_vv $vdata3, $vdata3, $vivec]}
@{[enc_blk $vdata3]}
@{[vrev8_v $vdata3, $vdata3]}
- @{[reverse_order_S $vdata3, $out]}
#Update IV to ciphertext block 3
- @{[vle32_v $vivec, $out]}
+ @{[vrgather_vv $vivec, $vdata3, $vindex]}
+
+ # Save the ciphertext (in reverse element order)
+ li $tmp_stride, $STRIDE
+ @{[reverse_order_S $vdata0, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata1, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata2, $out]}
+ addi $out, $out, $BLOCK_SIZE
+ @{[reverse_order_S $vdata3, $out]}
addi $out, $out, $BLOCK_SIZE
addi $len, $len, -$FOUR_BLOCKS
@{[enc_blk $vdata0]}
@{[vrev8_v $vdata0, $vdata0]}
+ # Update IV to ciphertext block 0
+ @{[vrgather_vv $vivec, $vdata0, $vindex]}
+
# Save the ciphertext (in reverse element order)
li $tmp_stride, $STRIDE
@{[reverse_order_S $vdata0, $out]}
-
- # Update IV to ciphertext block 0
- @{[vle32_v $vivec, $out]}
addi $out, $out, $BLOCK_SIZE
addi $len, $len, -$BLOCK_SIZE