From 33a4a27bfa096a6293be74a4f480b904916eedb4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Niels=20M=C3=B6ller?= Date: Tue, 9 Jan 2024 22:00:04 +0100 Subject: [PATCH] Add macros OP_YXX and OP_YXXX. --- powerpc64/machine.m4 | 24 +++++---- powerpc64/p8/aes-decrypt-internal.asm | 62 +++++++---------------- powerpc64/p8/aes-encrypt-internal.asm | 71 +++++++++------------------ 3 files changed, 58 insertions(+), 99 deletions(-) diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index 7033b93e..a94fd615 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -64,12 +64,18 @@ define(`INC_VR',`ifelse(substr($1,0,1),`v', ``v'eval($2+substr($1,1,len($1)))', `eval($2+$1)')') -C Apply op to 4 separate registers, with the same y, -C op x_k, x_k, y, for k = 1,2,3,4 -C OP4(op, x1, x2, x3, x4, y) -define(`OP4', ` - $1 $2, $2, $6 - $1 $3, $3, $6 - $1 $4, $4, $6 - $1 $5, $5, $6 -') +C Apply op x, x, y, for each x. +C OP_YXX(OP, Y, X1, X2, ...) +define(`OP_YXX', +`$1 $3, $3, $2 +ifelse(eval($# > 3), 1, +`OP_YXX($1, $2, shift(shift(shift($@))))dnl +')') + +C Apply op x, x, x, y, for each x. +C OP_YXXX(OP, Y, X1, X2, ...) +define(`OP_YXXX', +`$1 $3, $3, $3, $2 +ifelse(eval($# > 3), 1, +`OP_YXXX($1, $2, shift(shift(shift($@))))dnl +')') diff --git a/powerpc64/p8/aes-decrypt-internal.asm b/powerpc64/p8/aes-decrypt-internal.asm index 9a49fcdc..12179cbf 100644 --- a/powerpc64/p8/aes-decrypt-internal.asm +++ b/powerpc64/p8/aes-decrypt-internal.asm @@ -110,17 +110,9 @@ Lx8_loop: lxvd2x VSR(S6),r30,SRC lxvd2x VSR(S7),r31,SRC -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK - vperm S4,S4,S4,SWAP_MASK - vperm S5,S5,S5,SWAP_MASK - vperm S6,S6,S6,SWAP_MASK - vperm S7,S7,S7,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3,S4,S5,S6,S7)') - OP4(vxor, S0, S1, S2, S3, K) - OP4(vxor, S4, S5, S6, S7, K) + OP_YXX(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7) mtctr ROUNDS li r10,0x10 @@ -128,26 +120,16 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L8x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vncipher, S0, S1, S2, S3, ZERO) - OP4(vncipher, S4, S5, S6, S7, ZERO) - OP4(vxor, S0, S1, S2, S3, K) - OP4(vxor, S4, S5, S6, S7, K) + OP_YXX(vncipher, ZERO, S0, S1, S2, S3, S4, S5, S6, S7) + OP_YXX(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7) addi r10,r10,0x10 bdnz L8x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vncipherlast, S0, S1, S2, S3, K) - OP4(vncipherlast, S4, S5, S6, S7, K) + OP_YXX(vncipherlast, K, S0, S1, S2, S3, S4, S5, S6, S7) -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK - vperm S4,S4,S4,SWAP_MASK - vperm S5,S5,S5,SWAP_MASK - vperm S6,S6,S6,SWAP_MASK - vperm S7,S7,S7,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3,S4,S5,S6,S7)') stxvd2x VSR(S0),0,DST stxvd2x VSR(S1),r25,DST @@ -163,13 +145,13 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK subic. r5,r5,1 bne Lx8_loop - ld r25,-56(SP); - ld r26,-48(SP); - ld r27,-40(SP); - ld r28,-32(SP); - ld r29,-24(SP); - ld r30,-16(SP); - ld r31,-8(SP); + ld r25,-56(SP) + ld r26,-48(SP) + ld r27,-40(SP) + ld r28,-32(SP) + ld r29,-24(SP) + ld r30,-16(SP) + ld r31,-8(SP) clrldi LENGTH,LENGTH,61 @@ -189,12 +171,9 @@ L4x: addi r9,r9,0x10 lxvd2x VSR(S3),r9,SRC -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3)') - OP4(vxor, S0, S1, S2, S3, K) + OP_YXX(vxor, K, S0, S1, S2, S3) mtctr ROUNDS li r10,0x10 @@ -202,19 +181,16 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L4x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vncipher, S0, S1, S2, S3, ZERO) - OP4(vxor, S0, S1, S2, S3, K) + OP_YXX(vncipher, ZERO, S0, S1, S2, S3) + OP_YXX(vxor, K, S0, S1, S2, S3) addi r10,r10,0x10 bdnz L4x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vncipherlast, S0, S1, S2, S3, K) + OP_YXX(vncipherlast, K, S0, S1, S2, S3) -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3)') stxvd2x VSR(S0),0,DST li r9,0x10 diff --git a/powerpc64/p8/aes-encrypt-internal.asm b/powerpc64/p8/aes-encrypt-internal.asm index e16eecac..cd959f9e 100644 --- a/powerpc64/p8/aes-encrypt-internal.asm +++ b/powerpc64/p8/aes-encrypt-internal.asm @@ -72,13 +72,13 @@ PROLOGUE(_nettle_aes_encrypt) cmpldi r5,0 beq L4x - std r25,-56(SP); - std r26,-48(SP); - std r27,-40(SP); - std r28,-32(SP); - std r29,-24(SP); - std r30,-16(SP); - std r31,-8(SP); + std r25,-56(SP) + std r26,-48(SP) + std r27,-40(SP) + std r28,-32(SP) + std r29,-24(SP) + std r30,-16(SP) + std r31,-8(SP) li r25,0x10 li r26,0x20 @@ -102,17 +102,9 @@ Lx8_loop: lxvd2x VSR(S6),r30,SRC lxvd2x VSR(S7),r31,SRC -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK - vperm S4,S4,S4,SWAP_MASK - vperm S5,S5,S5,SWAP_MASK - vperm S6,S6,S6,SWAP_MASK - vperm S7,S7,S7,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3,S4,S5,S6,S7)') - OP4(vxor, S0, S1, S2, S3, K) - OP4(vxor, S4, S5, S6, S7, K) + OP_YXX(vxor, K, S0, S1, S2, S3, S4, S5, S6, S7) mtctr ROUNDS li r10,0x10 @@ -120,24 +112,15 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L8x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vcipher, S0, S1, S2, S3, K) - OP4(vcipher, S4, S5, S6, S7, K) + OP_YXX(vcipher, K, S0, S1, S2, S3, S4, S5, S6, S7) addi r10,r10,0x10 bdnz L8x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vcipherlast, S0, S1, S2, S3, K) - OP4(vcipherlast, S4, S5, S6, S7, K) + OP_YXX(vcipherlast, K, S0, S1, S2, S3, S4, S5, S6, S7) -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK - vperm S4,S4,S4,SWAP_MASK - vperm S5,S5,S5,SWAP_MASK - vperm S6,S6,S6,SWAP_MASK - vperm S7,S7,S7,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3,S4,S5,S6,S7)') stxvd2x VSR(S0),0,DST stxvd2x VSR(S1),r25,DST @@ -153,13 +136,13 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK subic. r5,r5,1 bne Lx8_loop - ld r25,-56(SP); - ld r26,-48(SP); - ld r27,-40(SP); - ld r28,-32(SP); - ld r29,-24(SP); - ld r30,-16(SP); - ld r31,-8(SP); + ld r25,-56(SP) + ld r26,-48(SP) + ld r27,-40(SP) + ld r28,-32(SP) + ld r29,-24(SP) + ld r30,-16(SP) + ld r31,-8(SP) clrldi LENGTH,LENGTH,61 @@ -179,12 +162,9 @@ L4x: addi r9,r9,0x10 lxvd2x VSR(S3),r9,SRC -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3)') - OP4(vxor, S0, S1, S2, S3, K) + OP_YXX(vxor, K, S0, S1, S2, S3) mtctr ROUNDS li r10,0x10 @@ -192,18 +172,15 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L4x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vcipher, S0, S1, S2, S3, K) + OP_YXX(vcipher, K, S0, S1, S2, S3) addi r10,r10,0x10 bdnz L4x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - OP4(vcipherlast, S0, S1, S2, S3, K) + OP_YXX(vcipherlast, K, S0, S1, S2, S3) -IF_LE(`vperm S0,S0,S0,SWAP_MASK - vperm S1,S1,S1,SWAP_MASK - vperm S2,S2,S2,SWAP_MASK - vperm S3,S3,S3,SWAP_MASK') +IF_LE(`OP_YXXX(vperm, SWAP_MASK, S0,S1,S2,S3)') stxvd2x VSR(S0),0,DST li r9,0x10 -- 2.47.2