From: Niels Möller Date: Tue, 9 Jan 2024 20:23:30 +0000 (+0100) Subject: Add OP4 macro. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fd6b108cd5939c5b1e00a4889322dde7473bec8a;p=thirdparty%2Fnettle.git Add OP4 macro. --- diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index 8f28f295..7033b93e 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -63,3 +63,13 @@ C INC_VR(VR, INC) define(`INC_VR',`ifelse(substr($1,0,1),`v', ``v'eval($2+substr($1,1,len($1)))', `eval($2+$1)')') + +C Apply op to 4 separate registers, with the same y, +C op x_k, x_k, y, for k = 1,2,3,4 +C OP4(op, x1, x2, x3, x4, y) +define(`OP4', ` + $1 $2, $2, $6 + $1 $3, $3, $6 + $1 $4, $4, $6 + $1 $5, $5, $6 +') diff --git a/powerpc64/p8/aes-decrypt-internal.asm b/powerpc64/p8/aes-decrypt-internal.asm index b2c49c63..9a49fcdc 100644 --- a/powerpc64/p8/aes-decrypt-internal.asm +++ b/powerpc64/p8/aes-decrypt-internal.asm @@ -80,13 +80,13 @@ PROLOGUE(_nettle_aes_decrypt) cmpldi r5,0 beq L4x - std r25,-56(SP); - std r26,-48(SP); - std r27,-40(SP); - std r28,-32(SP); - std r29,-24(SP); - std r30,-16(SP); - std r31,-8(SP); + std r25,-56(SP) + std r26,-48(SP) + std r27,-40(SP) + std r28,-32(SP) + std r29,-24(SP) + std r30,-16(SP) + std r31,-8(SP) li r25,0x10 li r26,0x20 @@ -119,14 +119,8 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S6,S6,S6,SWAP_MASK vperm S7,S7,S7,SWAP_MASK') - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K - vxor S4,S4,K - vxor S5,S5,K - vxor S6,S6,K - vxor S7,S7,K + OP4(vxor, S0, S1, S2, S3, K) + OP4(vxor, S4, S5, S6, S7, K) mtctr ROUNDS li r10,0x10 @@ -134,35 +128,17 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L8x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vncipher S0,S0,ZERO - vncipher S1,S1,ZERO - vncipher S2,S2,ZERO - vncipher S3,S3,ZERO - vncipher S4,S4,ZERO - vncipher S5,S5,ZERO - vncipher S6,S6,ZERO - vncipher S7,S7,ZERO - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K - vxor S4,S4,K - vxor S5,S5,K - vxor S6,S6,K - vxor S7,S7,K + OP4(vncipher, S0, S1, S2, S3, ZERO) + OP4(vncipher, S4, S5, S6, S7, ZERO) + OP4(vxor, S0, S1, S2, S3, K) + OP4(vxor, S4, S5, S6, S7, K) addi r10,r10,0x10 bdnz L8x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vncipherlast S0,S0,K - vncipherlast S1,S1,K - vncipherlast S2,S2,K - vncipherlast S3,S3,K - vncipherlast S4,S4,K - vncipherlast S5,S5,K - vncipherlast S6,S6,K - vncipherlast S7,S7,K + OP4(vncipherlast, S0, S1, S2, S3, K) + OP4(vncipherlast, S4, S5, S6, S7, K) IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S1,S1,S1,SWAP_MASK @@ -218,10 +194,7 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S2,S2,S2,SWAP_MASK vperm S3,S3,S3,SWAP_MASK') - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K + OP4(vxor, S0, S1, S2, S3, K) mtctr ROUNDS li r10,0x10 @@ -229,23 +202,14 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L4x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vncipher S0,S0,ZERO - vncipher S1,S1,ZERO - vncipher S2,S2,ZERO - vncipher S3,S3,ZERO - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K + OP4(vncipher, S0, S1, S2, S3, ZERO) + OP4(vxor, S0, S1, S2, S3, K) addi r10,r10,0x10 bdnz L4x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vncipherlast S0,S0,K - vncipherlast S1,S1,K - vncipherlast S2,S2,K - vncipherlast S3,S3,K + OP4(vncipherlast, S0, S1, S2, S3, K) IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S1,S1,S1,SWAP_MASK diff --git a/powerpc64/p8/aes-encrypt-internal.asm b/powerpc64/p8/aes-encrypt-internal.asm index 9957eb1d..e16eecac 100644 --- a/powerpc64/p8/aes-encrypt-internal.asm +++ b/powerpc64/p8/aes-encrypt-internal.asm @@ -111,14 +111,8 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S6,S6,S6,SWAP_MASK vperm S7,S7,S7,SWAP_MASK') - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K - vxor S4,S4,K - vxor S5,S5,K - vxor S6,S6,K - vxor S7,S7,K + OP4(vxor, S0, S1, S2, S3, K) + OP4(vxor, S4, S5, S6, S7, K) mtctr ROUNDS li r10,0x10 @@ -126,27 +120,15 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L8x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vcipher S0,S0,K - vcipher S1,S1,K - vcipher S2,S2,K - vcipher S3,S3,K - vcipher S4,S4,K - vcipher S5,S5,K - vcipher S6,S6,K - vcipher S7,S7,K + OP4(vcipher, S0, S1, S2, S3, K) + OP4(vcipher, S4, S5, S6, S7, K) addi r10,r10,0x10 bdnz L8x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vcipherlast S0,S0,K - vcipherlast S1,S1,K - vcipherlast S2,S2,K - vcipherlast S3,S3,K - vcipherlast S4,S4,K - vcipherlast S5,S5,K - vcipherlast S6,S6,K - vcipherlast S7,S7,K + OP4(vcipherlast, S0, S1, S2, S3, K) + OP4(vcipherlast, S4, S5, S6, S7, K) IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S1,S1,S1,SWAP_MASK @@ -202,10 +184,7 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S2,S2,S2,SWAP_MASK vperm S3,S3,S3,SWAP_MASK') - vxor S0,S0,K - vxor S1,S1,K - vxor S2,S2,K - vxor S3,S3,K + OP4(vxor, S0, S1, S2, S3, K) mtctr ROUNDS li r10,0x10 @@ -213,19 +192,13 @@ IF_LE(`vperm S0,S0,S0,SWAP_MASK L4x_round_loop: lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vcipher S0,S0,K - vcipher S1,S1,K - vcipher S2,S2,K - vcipher S3,S3,K + OP4(vcipher, S0, S1, S2, S3, K) addi r10,r10,0x10 bdnz L4x_round_loop lxvd2x VSR(K),r10,KEYS vperm K,K,K,SWAP_MASK - vcipherlast S0,S0,K - vcipherlast S1,S1,K - vcipherlast S2,S2,K - vcipherlast S3,S3,K + OP4(vcipherlast, S0, S1, S2, S3, K) IF_LE(`vperm S0,S0,S0,SWAP_MASK vperm S1,S1,S1,SWAP_MASK