+2021-04-13 Niels Möller <nisse@lysator.liu.se>
+
+ * powerpc64/p8/aes-encrypt-internal.asm (SWAP_MASK): Change macro
+ name to use all uppercase.
+ * powerpc64/p8/aes-decrypt-internal.asm (SWAP_MASK): Likewise.
+
2021-04-11 Niels Möller <nisse@lysator.liu.se>
* config.guess: Update to 2021-01-25 version, from savannah's
define(`DST', `r7')
define(`SRC', `r8')
-define(`swap_mask', `v0')
+define(`SWAP_MASK', `v0')
define(`K', `v1')
define(`S0', `v2')
PROLOGUE(_nettle_aes_decrypt)
vxor ZERO,ZERO,ZERO
- DATA_LOAD_VEC(swap_mask,.swap_mask,r5)
+ DATA_LOAD_VEC(SWAP_MASK,.swap_mask,r5)
subi ROUNDS,ROUNDS,1
srdi LENGTH,LENGTH,4
.align 5
Lx8_loop:
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
lxvd2x VSR(S1),r25,SRC
lxvd2x VSR(S6),r30,SRC
lxvd2x VSR(S7),r31,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask
- vperm S4,S4,S4,swap_mask
- vperm S5,S5,S5,swap_mask
- vperm S6,S6,S6,swap_mask
- vperm S7,S7,S7,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK
+ vperm S4,S4,S4,SWAP_MASK
+ vperm S5,S5,S5,SWAP_MASK
+ vperm S6,S6,S6,SWAP_MASK
+ vperm S7,S7,S7,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L8x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vncipher S1,S1,ZERO
vncipher S2,S2,ZERO
bdnz L8x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K
vncipherlast S1,S1,K
vncipherlast S2,S2,K
vncipherlast S6,S6,K
vncipherlast S7,S7,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask
- vperm S4,S4,S4,swap_mask
- vperm S5,S5,S5,swap_mask
- vperm S6,S6,S6,swap_mask
- vperm S7,S7,S7,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK
+ vperm S4,S4,S4,SWAP_MASK
+ vperm S5,S5,S5,SWAP_MASK
+ vperm S6,S6,S6,SWAP_MASK
+ vperm S7,S7,S7,SWAP_MASK')
stxvd2x VSR(S0),0,DST
stxvd2x VSR(S1),r25,DST
beq L2x
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
li r9,0x10
addi r9,r9,0x10
lxvd2x VSR(S3),r9,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L4x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vncipher S1,S1,ZERO
vncipher S2,S2,ZERO
bdnz L4x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K
vncipherlast S1,S1,K
vncipherlast S2,S2,K
vncipherlast S3,S3,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK')
stxvd2x VSR(S0),0,DST
li r9,0x10
beq L1x
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
li r9,0x10
lxvd2x VSR(S1),r9,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L2x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vncipher S1,S1,ZERO
vxor S0,S0,K
bdnz L2x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K
vncipherlast S1,S1,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK')
stxvd2x VSR(S0),0,DST
li r9,0x10
beq Ldone
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK')
vxor S0,S0,K
.align 5
L1x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vxor S0,S0,K
addi r10,r10,0x10
bdnz L1x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K
-IF_LE(`vperm S0,S0,S0,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK')
stxvd2x VSR(S0),0,DST
define(`DST', `r7')
define(`SRC', `r8')
-define(`swap_mask', `v0')
+define(`SWAP_MASK', `v0')
define(`K', `v1')
define(`S0', `v2')
define(`FUNC_ALIGN', `5')
PROLOGUE(_nettle_aes_encrypt)
- DATA_LOAD_VEC(swap_mask,.swap_mask,r5)
+ DATA_LOAD_VEC(SWAP_MASK,.swap_mask,r5)
subi ROUNDS,ROUNDS,1
srdi LENGTH,LENGTH,4
.align 5
Lx8_loop:
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
lxvd2x VSR(S1),r25,SRC
lxvd2x VSR(S6),r30,SRC
lxvd2x VSR(S7),r31,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask
- vperm S4,S4,S4,swap_mask
- vperm S5,S5,S5,swap_mask
- vperm S6,S6,S6,swap_mask
- vperm S7,S7,S7,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK
+ vperm S4,S4,S4,SWAP_MASK
+ vperm S5,S5,S5,SWAP_MASK
+ vperm S6,S6,S6,SWAP_MASK
+ vperm S7,S7,S7,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L8x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipher S0,S0,K
vcipher S1,S1,K
vcipher S2,S2,K
bdnz L8x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipherlast S0,S0,K
vcipherlast S1,S1,K
vcipherlast S2,S2,K
vcipherlast S6,S6,K
vcipherlast S7,S7,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask
- vperm S4,S4,S4,swap_mask
- vperm S5,S5,S5,swap_mask
- vperm S6,S6,S6,swap_mask
- vperm S7,S7,S7,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK
+ vperm S4,S4,S4,SWAP_MASK
+ vperm S5,S5,S5,SWAP_MASK
+ vperm S6,S6,S6,SWAP_MASK
+ vperm S7,S7,S7,SWAP_MASK')
stxvd2x VSR(S0),0,DST
stxvd2x VSR(S1),r25,DST
beq L2x
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
li r9,0x10
addi r9,r9,0x10
lxvd2x VSR(S3),r9,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L4x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipher S0,S0,K
vcipher S1,S1,K
vcipher S2,S2,K
bdnz L4x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipherlast S0,S0,K
vcipherlast S1,S1,K
vcipherlast S2,S2,K
vcipherlast S3,S3,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask
- vperm S2,S2,S2,swap_mask
- vperm S3,S3,S3,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK
+ vperm S2,S2,S2,SWAP_MASK
+ vperm S3,S3,S3,SWAP_MASK')
stxvd2x VSR(S0),0,DST
li r9,0x10
beq L1x
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
li r9,0x10
lxvd2x VSR(S1),r9,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK')
vxor S0,S0,K
vxor S1,S1,K
.align 5
L2x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipher S0,S0,K
vcipher S1,S1,K
addi r10,r10,0x10
bdnz L2x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipherlast S0,S0,K
vcipherlast S1,S1,K
-IF_LE(`vperm S0,S0,S0,swap_mask
- vperm S1,S1,S1,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK
+ vperm S1,S1,S1,SWAP_MASK')
stxvd2x VSR(S0),0,DST
li r9,0x10
beq Ldone
lxvd2x VSR(K),0,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
lxvd2x VSR(S0),0,SRC
-IF_LE(`vperm S0,S0,S0,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK')
vxor S0,S0,K
.align 5
L1x_round_loop:
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipher S0,S0,K
addi r10,r10,0x10
bdnz L1x_round_loop
lxvd2x VSR(K),r10,KEYS
- vperm K,K,K,swap_mask
+ vperm K,K,K,SWAP_MASK
vcipherlast S0,S0,K
-IF_LE(`vperm S0,S0,S0,swap_mask')
+IF_LE(`vperm S0,S0,S0,SWAP_MASK')
stxvd2x VSR(S0),0,DST