+2024-01-28 Niels Möller <nisse@lysator.liu.se>
+
+ * powerpc64/p8/aes-encrypt-internal.asm: Use r10-r12 consistently
+ for indexing, and reducing number of used callee-save registers.
+ * powerpc64/p8/aes-decrypt-internal.asm: Likewise.
+
+ 2024-01-27 Niels Möller <nisse@lysator.liu.se>
+
+ * aes-invert-internal.c (_nettle_aes_invert): Don't reorder the subkeys.
+ * aes-decrypt-internal.c (_nettle_aes_decrypt): Updated to process
+ subkeys starting from the end, and let subkeys pointer point at
+ the subkey for the first decrypt round, located at the end of the
+ array.
+ * aes128-decrypt.c (nettle_aes128_decrypt): Updated accordingly.
+ * aes192-decrypt.c (nettle_aes192_decrypt): Likewise.
+ * aes256-decrypt.c (nettle_aes256_decrypt): Likewise.
+ * arm/aes.m4 (AES_LOAD_INCR): New macro, specifying desired
+ increment of key pointer.
+ * arm/aes-decrypt-internal.asm: Updated for new conventions.
+ * arm/v6/aes-decrypt-internal.asm: Likewise.
+ * arm64/crypto/aes128-decrypt.asm: Likewise.
+ * arm64/crypto/aes192-decrypt.asm: Likewise.
+ * arm64/crypto/aes256-decrypt.asm: Likewise.
+ * powerpc64/p8/aes-decrypt-internal.asm: Likewise.
+ * sparc64/aes-decrypt-internal.asm: Likewise.
+ * x86/aes-decrypt-internal.asm: Likewise.
+ * x86_64/aes-decrypt-internal.asm: Likewise.
+ * x86_64/aes-decrypt-internal.asm: Likewise.
+ * x86_64/aesni/aes128-decrypt.asm: Likewise.
+ * x86_64/aesni/aes192-decrypt.asm: Likewise.
+ * x86_64/aesni/aes256-decrypt.asm: Likewise.
+
2024-01-26 Niels Möller <nisse@lysator.liu.se>
Delete all sparc32 assembly.
OPN_XXY(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7)
mtctr ROUNDS
- li r9,0x10
- li r10,-0x10
++ li r9,-0x10
+
.align 5
L8x_round_loop:
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
OPN_XXY(vncipher, ZERO, S0, S1, S2, S3, S4, S5, S6, S7)
OPN_XXY(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7)
- addi r9,r9,0x10
- subi r10,r10,0x10
++ subi r9,r9,0x10
bdnz L8x_round_loop
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
OPN_XXY(vncipherlast, K, S0, S1, S2, S3, S4, S5, S6, S7)
OPN_XXY(vxor, K, S0, S1, S2, S3)
mtctr ROUNDS
- li r9,0x10
- li r10,-0x10
++ li r9,-0x10
.align 5
L4x_round_loop:
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
OPN_XXY(vncipher, ZERO, S0, S1, S2, S3)
OPN_XXY(vxor, K, S0, S1, S2, S3)
- addi r9,r9,0x10
- subi r10,r10,0x10
++ subi r9,r9,0x10
bdnz L4x_round_loop
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
OPN_XXY(vncipherlast, K, S0, S1, S2, S3)
vxor S1,S1,K
mtctr ROUNDS
- li r9,0x10
- li r10,-0x10
++ li r9,-0x10
.align 5
L2x_round_loop:
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vncipher S1,S1,ZERO
vxor S0,S0,K
vxor S1,S1,K
- addi r9,r9,0x10
- subi r10,r10,0x10
++ subi r9,r9,0x10
bdnz L2x_round_loop
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K
vncipherlast S1,S1,K
vxor S0,S0,K
mtctr ROUNDS
- li r9,0x10
- li r10,-0x10
++ li r9,-0x10
.align 5
L1x_round_loop:
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
vncipher S0,S0,ZERO
vxor S0,S0,K
- addi r9,r9,0x10
- subi r10,r10,0x10
++ subi r9,r9,0x10
bdnz L1x_round_loop
- lxvd2x VSR(K),r10,KEYS
+ lxvd2x VSR(K),r9,KEYS
vperm K,K,K,SWAP_MASK
vncipherlast S0,S0,K