From: David Benjamin Date: Wed, 29 Dec 2021 18:05:12 +0000 (-0500) Subject: Don't use __ARMEL__/__ARMEB__ in aarch64 assembly X-Git-Tag: openssl-3.2.0-alpha1~3108 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=40c24d74deaad8a0ad7566a68ea5ea757bc3ccef;p=thirdparty%2Fopenssl.git Don't use __ARMEL__/__ARMEB__ in aarch64 assembly GCC's __ARMEL__ and __ARMEB__ defines denote little- and big-endian arm, respectively. They are not defined on aarch64, which instead use __AARCH64EL__ and __AARCH64EB__. However, OpenSSL's assembly originally used the 32-bit defines on both platforms and even define __ARMEL__ and __ARMEB__ in arm_arch.h. This is less portable and can even interfere with other headers, which use __ARMEL__ to detect little-endian arm. Over time, the aarch64 assembly has switched to the correct defines, such as in 32bbb62ea634239e7cb91d6450ba23517082bab6. This commit finishes the job: poly1305-armv8.pl needed a fix and the dual-arch armx.pl files get one more transform to convert from 32-bit to 64-bit. (There is an even more official endianness detector, __ARM_BIG_ENDIAN in the Arm C Language Extensions. But I've stuck with the GCC ones here as that would be a larger change.) Reviewed-by: Matt Caswell Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale Reviewed-by: Bernd Edlinger (Merged from https://github.com/openssl/openssl/pull/17373) --- diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl index c323179b2b4..da10c44030c 100755 --- a/crypto/aes/asm/aesv8-armx.pl +++ b/crypto/aes/asm/aesv8-armx.pl @@ -3613,6 +3613,9 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\.[ui]?64//o and s/\.16b/\.2d/go; s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + # Switch preprocessor checks to aarch64 versions. + s/__ARME([BL])__/__AARCH64E$1__/go; + print $_,"\n"; } } else { ######## 32-bit code diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h index ca480456708..848f06542c1 100644 --- a/crypto/arm_arch.h +++ b/crypto/arm_arch.h @@ -21,11 +21,6 @@ # elif defined(__GNUC__) # if defined(__aarch64__) # define __ARM_ARCH__ 8 -# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ -# define __ARMEB__ -# else -# define __ARMEL__ -# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectures[] table in diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl index 57f893e77c1..a1cfad0ef6f 100644 --- a/crypto/modes/asm/ghashv8-armx.pl +++ b/crypto/modes/asm/ghashv8-armx.pl @@ -755,6 +755,9 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\.[uisp]?64//o and s/\.16b/\.2d/go; s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + # Switch preprocessor checks to aarch64 versions. + s/__ARME([BL])__/__AARCH64E$1__/go; + print $_,"\n"; } } else { ######## 32-bit code diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl index 20816c42831..e2c7f2822c0 100755 --- a/crypto/poly1305/asm/poly1305-armv8.pl +++ b/crypto/poly1305/asm/poly1305-armv8.pl @@ -86,7 +86,7 @@ poly1305_init: ldp $r0,$r1,[$inp] // load key mov $s1,#0xfffffffc0fffffff movk $s1,#0x0fff,lsl#48 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $r0,$r0 // flip bytes rev $r1,$r1 #endif @@ -136,7 +136,7 @@ poly1305_blocks: .Loop: ldp $t0,$t1,[$inp],#16 // load input sub $len,$len,#16 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $t0,$t0 rev $t1,$t1 #endif @@ -204,13 +204,13 @@ poly1305_emit: csel $h0,$h0,$d0,eq csel $h1,$h1,$d1,eq -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ ror $t0,$t0,#32 // flip nonce words ror $t1,$t1,#32 #endif adds $h0,$h0,$t0 // accumulate nonce adc $h1,$h1,$t1 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $h0,$h0 // flip output bytes rev $h1,$h1 #endif @@ -345,7 +345,7 @@ poly1305_blocks_neon: adcs $h1,$h1,xzr adc $h2,$h2,xzr -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $d0,$d0 rev $d1,$d1 #endif @@ -391,7 +391,7 @@ poly1305_blocks_neon: ldp $d0,$d1,[$inp],#16 // load input sub $len,$len,#16 add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $d0,$d0 rev $d1,$d1 #endif @@ -476,7 +476,7 @@ poly1305_blocks_neon: lsl $padbit,$padbit,#24 add x15,$ctx,#48 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x8,x8 rev x12,x12 rev x9,x9 @@ -512,7 +512,7 @@ poly1305_blocks_neon: ld1 {$S2,$R3,$S3,$R4},[x15],#64 ld1 {$S4},[x15] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x8,x8 rev x12,x12 rev x9,x9 @@ -573,7 +573,7 @@ poly1305_blocks_neon: umull $ACC1,$IN23_0,${R1}[2] ldp x9,x13,[$in2],#48 umull $ACC0,$IN23_0,${R0}[2] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x8,x8 rev x12,x12 rev x9,x9 @@ -638,7 +638,7 @@ poly1305_blocks_neon: umlal $ACC4,$IN01_2,${R2}[0] umlal $ACC1,$IN01_2,${S4}[0] umlal $ACC2,$IN01_2,${R0}[0] -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev x8,x8 rev x12,x12 rev x9,x9 @@ -922,13 +922,13 @@ poly1305_emit_neon: csel $h0,$h0,$d0,eq csel $h1,$h1,$d1,eq -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ ror $t0,$t0,#32 // flip nonce words ror $t1,$t1,#32 #endif adds $h0,$h0,$t0 // accumulate nonce adc $h1,$h1,$t1 -#ifdef __ARMEB__ +#ifdef __AARCH64EB__ rev $h0,$h0 // flip output bytes rev $h1,$h1 #endif