$code .= <<___;
.p2align 3
L_enc_128:
- @{[vsetivli__x0_4_e32_m1_tu_mu]}
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+
+ @{[vle32_v $v1, ($INP)]}
@{[vle32_v $v10, ($KEYP)]}
+ @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, ($KEYP)]}
+ @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7]
addi $KEYP, $KEYP, 16
@{[vle32_v $v12, ($KEYP)]}
+ @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11]
addi $KEYP, $KEYP, 16
@{[vle32_v $v13, ($KEYP)]}
+ @{[vaesem_vs $v1, $v13]} # with round key w[12,15]
addi $KEYP, $KEYP, 16
@{[vle32_v $v14, ($KEYP)]}
+ @{[vaesem_vs $v1, $v14]} # with round key w[16,19]
addi $KEYP, $KEYP, 16
@{[vle32_v $v15, ($KEYP)]}
+ @{[vaesem_vs $v1, $v15]} # with round key w[20,23]
addi $KEYP, $KEYP, 16
@{[vle32_v $v16, ($KEYP)]}
+ @{[vaesem_vs $v1, $v16]} # with round key w[24,27]
addi $KEYP, $KEYP, 16
@{[vle32_v $v17, ($KEYP)]}
+ @{[vaesem_vs $v1, $v17]} # with round key w[28,31]
addi $KEYP, $KEYP, 16
@{[vle32_v $v18, ($KEYP)]}
+ @{[vaesem_vs $v1, $v18]} # with round key w[32,35]
addi $KEYP, $KEYP, 16
@{[vle32_v $v19, ($KEYP)]}
+ @{[vaesem_vs $v1, $v19]} # with round key w[36,39]
addi $KEYP, $KEYP, 16
@{[vle32_v $v20, ($KEYP)]}
-
- @{[vle32_v $v1, ($INP)]}
-
- @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
- @{[vaesem_vs $v1, $v11]} # with round key w[ 4, 7]
- @{[vaesem_vs $v1, $v12]} # with round key w[ 8,11]
- @{[vaesem_vs $v1, $v13]} # with round key w[12,15]
- @{[vaesem_vs $v1, $v14]} # with round key w[16,19]
- @{[vaesem_vs $v1, $v15]} # with round key w[20,23]
- @{[vaesem_vs $v1, $v16]} # with round key w[24,27]
- @{[vaesem_vs $v1, $v17]} # with round key w[28,31]
- @{[vaesem_vs $v1, $v18]} # with round key w[32,35]
- @{[vaesem_vs $v1, $v19]} # with round key w[36,39]
@{[vaesef_vs $v1, $v20]} # with round key w[40,43]
@{[vse32_v $v1, ($OUTP)]}
$code .= <<___;
.p2align 3
L_enc_192:
- @{[vsetivli__x0_4_e32_m1_tu_mu]}
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+
+ @{[vle32_v $v1, ($INP)]}
@{[vle32_v $v10, ($KEYP)]}
+ @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, ($KEYP)]}
+ @{[vaesem_vs $v1, $v11]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v12, ($KEYP)]}
+ @{[vaesem_vs $v1, $v12]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v13, ($KEYP)]}
+ @{[vaesem_vs $v1, $v13]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v14, ($KEYP)]}
+ @{[vaesem_vs $v1, $v14]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v15, ($KEYP)]}
+ @{[vaesem_vs $v1, $v15]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v16, ($KEYP)]}
+ @{[vaesem_vs $v1, $v16]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v17, ($KEYP)]}
+ @{[vaesem_vs $v1, $v17]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v18, ($KEYP)]}
+ @{[vaesem_vs $v1, $v18]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v19, ($KEYP)]}
+ @{[vaesem_vs $v1, $v19]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v20, ($KEYP)]}
+ @{[vaesem_vs $v1, $v20]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v21, ($KEYP)]}
+ @{[vaesem_vs $v1, $v21]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v22, ($KEYP)]}
-
- @{[vle32_v $v1, ($INP)]}
-
- @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
- @{[vaesem_vs $v1, $v11]}
- @{[vaesem_vs $v1, $v12]}
- @{[vaesem_vs $v1, $v13]}
- @{[vaesem_vs $v1, $v14]}
- @{[vaesem_vs $v1, $v15]}
- @{[vaesem_vs $v1, $v16]}
- @{[vaesem_vs $v1, $v17]}
- @{[vaesem_vs $v1, $v18]}
- @{[vaesem_vs $v1, $v19]}
- @{[vaesem_vs $v1, $v20]}
- @{[vaesem_vs $v1, $v21]}
@{[vaesef_vs $v1, $v22]}
@{[vse32_v $v1, ($OUTP)]}
$code .= <<___;
.p2align 3
L_enc_256:
- @{[vsetivli__x0_4_e32_m1_tu_mu]}
+ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
+
+ @{[vle32_v $v1, ($INP)]}
@{[vle32_v $v10, ($KEYP)]}
+ @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, ($KEYP)]}
+ @{[vaesem_vs $v1, $v11]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v12, ($KEYP)]}
+ @{[vaesem_vs $v1, $v12]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v13, ($KEYP)]}
+ @{[vaesem_vs $v1, $v13]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v14, ($KEYP)]}
+ @{[vaesem_vs $v1, $v14]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v15, ($KEYP)]}
+ @{[vaesem_vs $v1, $v15]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v16, ($KEYP)]}
+ @{[vaesem_vs $v1, $v16]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v17, ($KEYP)]}
+ @{[vaesem_vs $v1, $v17]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v18, ($KEYP)]}
+ @{[vaesem_vs $v1, $v18]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v19, ($KEYP)]}
+ @{[vaesem_vs $v1, $v19]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v20, ($KEYP)]}
+ @{[vaesem_vs $v1, $v20]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v21, ($KEYP)]}
+ @{[vaesem_vs $v1, $v21]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v22, ($KEYP)]}
+ @{[vaesem_vs $v1, $v22]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v23, ($KEYP)]}
+ @{[vaesem_vs $v1, $v23]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v24, ($KEYP)]}
-
- @{[vle32_v $v1, ($INP)]}
-
- @{[vaesz_vs $v1, $v10]} # with round key w[ 0, 3]
- @{[vaesem_vs $v1, $v11]}
- @{[vaesem_vs $v1, $v12]}
- @{[vaesem_vs $v1, $v13]}
- @{[vaesem_vs $v1, $v14]}
- @{[vaesem_vs $v1, $v15]}
- @{[vaesem_vs $v1, $v16]}
- @{[vaesem_vs $v1, $v17]}
- @{[vaesem_vs $v1, $v18]}
- @{[vaesem_vs $v1, $v19]}
- @{[vaesem_vs $v1, $v20]}
- @{[vaesem_vs $v1, $v21]}
- @{[vaesem_vs $v1, $v22]}
- @{[vaesem_vs $v1, $v23]}
@{[vaesef_vs $v1, $v24]}
@{[vse32_v $v1, ($OUTP)]}