From: Eric Biggers Date: Wed, 1 Apr 2026 00:33:31 +0000 (-0700) Subject: lib/crypto: arm64: Assume a little-endian kernel X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=12b11e47f126d097839fd2f077636e2139b0151b;p=thirdparty%2Fkernel%2Flinux.git lib/crypto: arm64: Assume a little-endian kernel Since support for big-endian arm64 kernels was removed, the CPU_LE() macro now unconditionally emits the code it is passed, and the CPU_BE() macro now unconditionally discards the code it is passed. Simplify the assembly code in lib/crypto/arm64/ accordingly. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20260401003331.144065-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- diff --git a/lib/crypto/arm64/aes-cipher-core.S b/lib/crypto/arm64/aes-cipher-core.S index 651f701c56a86..0b05ec4be65fb 100644 --- a/lib/crypto/arm64/aes-cipher-core.S +++ b/lib/crypto/arm64/aes-cipher-core.S @@ -87,11 +87,6 @@ ldp w8, w9, [rk], #16 ldp w10, w11, [rk, #-8] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) - eor w4, w4, w8 eor w5, w5, w9 eor w6, w6, w10 @@ -112,11 +107,6 @@ CPU_BE( rev w7, w7 ) 3: adr_l tt, \ltab \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) - stp w4, w5, [out] stp w6, w7, [out, #8] ret diff --git a/lib/crypto/arm64/chacha-neon-core.S b/lib/crypto/arm64/chacha-neon-core.S index 80079586ecc7a..cb18eec968bdf 100644 --- a/lib/crypto/arm64/chacha-neon-core.S +++ b/lib/crypto/arm64/chacha-neon-core.S @@ -531,10 +531,6 @@ SYM_FUNC_START(chacha_4block_xor_neon) add v3.4s, v3.4s, v19.4s add a2, a2, w8 add a3, a3, w9 -CPU_BE( rev a0, a0 ) -CPU_BE( rev a1, a1 ) -CPU_BE( rev a2, a2 ) -CPU_BE( rev a3, a3 ) ld4r {v24.4s-v27.4s}, [x0], #16 ld4r {v28.4s-v31.4s}, [x0] @@ -555,10 +551,6 @@ CPU_BE( rev a3, a3 ) add v7.4s, v7.4s, v23.4s add a6, a6, w8 add a7, a7, w9 -CPU_BE( rev a4, a4 ) -CPU_BE( rev a5, a5 ) -CPU_BE( rev a6, a6 ) -CPU_BE( rev a7, a7 ) // x8[0-3] += s2[0] // x9[0-3] += s2[1] @@ -576,10 +568,6 @@ CPU_BE( rev a7, a7 ) add v11.4s, v11.4s, v27.4s add a10, a10, w8 add a11, a11, w9 -CPU_BE( rev a8, a8 ) -CPU_BE( rev a9, a9 ) -CPU_BE( rev a10, a10 ) -CPU_BE( rev a11, a11 ) // x12[0-3] += s3[0] // x13[0-3] += s3[1] @@ -597,10 +585,6 @@ CPU_BE( rev a11, a11 ) add v15.4s, v15.4s, v31.4s add a14, a14, w8 add a15, a15, w9 -CPU_BE( rev a12, a12 ) -CPU_BE( rev a13, a13 ) -CPU_BE( rev a14, a14 ) -CPU_BE( rev a15, a15 ) // interleave 32-bit words in state n, n+1 ldp w6, w7, [x2], #64 diff --git a/lib/crypto/arm64/ghash-neon-core.S b/lib/crypto/arm64/ghash-neon-core.S index 85b20fcd98fef..4c5799172b49c 100644 --- a/lib/crypto/arm64/ghash-neon-core.S +++ b/lib/crypto/arm64/ghash-neon-core.S @@ -192,7 +192,7 @@ SYM_FUNC_START(pmull_ghash_update_p8) sub x0, x0, #1 /* multiply XL by SHASH in GF(2^128) */ -CPU_LE( rev64 T1.16b, T1.16b ) + rev64 T1.16b, T1.16b ext T2.16b, XL.16b, XL.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 diff --git a/lib/crypto/arm64/sha1-ce-core.S b/lib/crypto/arm64/sha1-ce-core.S index 59d27fda07144..40f2a6c8d0c56 100644 --- a/lib/crypto/arm64/sha1-ce-core.S +++ b/lib/crypto/arm64/sha1-ce-core.S @@ -80,10 +80,10 @@ SYM_FUNC_START(sha1_ce_transform) 0: ld1 {v8.4s-v11.4s}, [x1], #64 sub x2, x2, #1 -CPU_LE( rev32 v8.16b, v8.16b ) -CPU_LE( rev32 v9.16b, v9.16b ) -CPU_LE( rev32 v10.16b, v10.16b ) -CPU_LE( rev32 v11.16b, v11.16b ) + rev32 v8.16b, v8.16b + rev32 v9.16b, v9.16b + rev32 v10.16b, v10.16b + rev32 v11.16b, v11.16b add t0.4s, v8.4s, k0.4s mov dg0v.16b, dgav.16b diff --git a/lib/crypto/arm64/sha256-ce.S b/lib/crypto/arm64/sha256-ce.S index b54ad977afa33..8fdbf0a9ff894 100644 --- a/lib/crypto/arm64/sha256-ce.S +++ b/lib/crypto/arm64/sha256-ce.S @@ -94,10 +94,10 @@ SYM_FUNC_START(sha256_ce_transform) 0: ld1 {v16.4s-v19.4s}, [x1], #64 sub x2, x2, #1 -CPU_LE( rev32 v16.16b, v16.16b ) -CPU_LE( rev32 v17.16b, v17.16b ) -CPU_LE( rev32 v18.16b, v18.16b ) -CPU_LE( rev32 v19.16b, v19.16b ) + rev32 v16.16b, v16.16b + rev32 v17.16b, v17.16b + rev32 v18.16b, v18.16b + rev32 v19.16b, v19.16b add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b @@ -289,14 +289,14 @@ SYM_FUNC_START(sha256_ce_finup2x) ld1 {v20.4s-v23.4s}, [data2], #64 .Lfinup2x_loop_have_data: // Convert the words of the data blocks from big endian. -CPU_LE( rev32 v16.16b, v16.16b ) -CPU_LE( rev32 v17.16b, v17.16b ) -CPU_LE( rev32 v18.16b, v18.16b ) -CPU_LE( rev32 v19.16b, v19.16b ) -CPU_LE( rev32 v20.16b, v20.16b ) -CPU_LE( rev32 v21.16b, v21.16b ) -CPU_LE( rev32 v22.16b, v22.16b ) -CPU_LE( rev32 v23.16b, v23.16b ) + rev32 v16.16b, v16.16b + rev32 v17.16b, v17.16b + rev32 v18.16b, v18.16b + rev32 v19.16b, v19.16b + rev32 v20.16b, v20.16b + rev32 v21.16b, v21.16b + rev32 v22.16b, v22.16b + rev32 v23.16b, v23.16b .Lfinup2x_loop_have_bswapped_data: // Save the original state for each block. @@ -336,11 +336,8 @@ CPU_LE( rev32 v23.16b, v23.16b ) sub w8, len, #64 // w8 = len - 64 add data1, data1, w8, sxtw // data1 += len - 64 add data2, data2, w8, sxtw // data2 += len - 64 -CPU_LE( mov x9, #0x80 ) -CPU_LE( fmov d16, x9 ) -CPU_BE( movi v16.16b, #0 ) -CPU_BE( mov x9, #0x8000000000000000 ) -CPU_BE( mov v16.d[1], x9 ) + mov x9, #0x80 + fmov d16, x9 movi v17.16b, #0 stp q16, q17, [sp, #64] stp q17, q17, [sp, #96] @@ -348,7 +345,7 @@ CPU_BE( mov v16.d[1], x9 ) cmp len, #56 b.ge 1f // will count spill into its own block? lsl count, count, #3 -CPU_LE( rev count, count ) + rev count, count str count, [x9, #56] mov final_step, #2 // won't need count-only block b 2f @@ -393,10 +390,10 @@ CPU_LE( rev count, count ) .Lfinup2x_done: // Write the two digests with all bytes in the correct order. -CPU_LE( rev32 state0_a.16b, state0_a.16b ) -CPU_LE( rev32 state1_a.16b, state1_a.16b ) -CPU_LE( rev32 state0_b.16b, state0_b.16b ) -CPU_LE( rev32 state1_b.16b, state1_b.16b ) + rev32 state0_a.16b, state0_a.16b + rev32 state1_a.16b, state1_a.16b + rev32 state0_b.16b, state0_b.16b + rev32 state1_b.16b, state1_b.16b st1 {state0_a.4s-state1_a.4s}, [out1] st1 {state0_b.4s-state1_b.4s}, [out2] add sp, sp, #128 diff --git a/lib/crypto/arm64/sha512-ce-core.S b/lib/crypto/arm64/sha512-ce-core.S index 26834921e8d6f..daa2c63a20c0d 100644 --- a/lib/crypto/arm64/sha512-ce-core.S +++ b/lib/crypto/arm64/sha512-ce-core.S @@ -110,14 +110,14 @@ SYM_FUNC_START(sha512_ce_transform) ld1 {v16.2d-v19.2d}, [x1], #64 sub x2, x2, #1 -CPU_LE( rev64 v12.16b, v12.16b ) -CPU_LE( rev64 v13.16b, v13.16b ) -CPU_LE( rev64 v14.16b, v14.16b ) -CPU_LE( rev64 v15.16b, v15.16b ) -CPU_LE( rev64 v16.16b, v16.16b ) -CPU_LE( rev64 v17.16b, v17.16b ) -CPU_LE( rev64 v18.16b, v18.16b ) -CPU_LE( rev64 v19.16b, v19.16b ) + rev64 v12.16b, v12.16b + rev64 v13.16b, v13.16b + rev64 v14.16b, v14.16b + rev64 v15.16b, v15.16b + rev64 v16.16b, v16.16b + rev64 v17.16b, v17.16b + rev64 v18.16b, v18.16b + rev64 v19.16b, v19.16b mov x4, x3 // rc pointer diff --git a/lib/crypto/arm64/sm3-ce-core.S b/lib/crypto/arm64/sm3-ce-core.S index 9cef7ea7f34f0..ee7f900d7cff7 100644 --- a/lib/crypto/arm64/sm3-ce-core.S +++ b/lib/crypto/arm64/sm3-ce-core.S @@ -91,10 +91,10 @@ SYM_FUNC_START(sm3_ce_transform) mov v15.16b, v8.16b mov v16.16b, v9.16b -CPU_LE( rev32 v0.16b, v0.16b ) -CPU_LE( rev32 v1.16b, v1.16b ) -CPU_LE( rev32 v2.16b, v2.16b ) -CPU_LE( rev32 v3.16b, v3.16b ) + rev32 v0.16b, v0.16b + rev32 v1.16b, v1.16b + rev32 v2.16b, v2.16b + rev32 v3.16b, v3.16b ext v11.16b, v13.16b, v13.16b, #4