]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
lib/crypto: arm64: Assume a little-endian kernel
authorEric Biggers <ebiggers@kernel.org>
Wed, 1 Apr 2026 00:33:31 +0000 (17:33 -0700)
committerEric Biggers <ebiggers@kernel.org>
Wed, 1 Apr 2026 20:02:15 +0000 (13:02 -0700)
Since support for big-endian arm64 kernels was removed, the CPU_LE()
macro now unconditionally emits the code it is passed, and the CPU_BE()
macro now unconditionally discards the code it is passed.

Simplify the assembly code in lib/crypto/arm64/ accordingly.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260401003331.144065-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
lib/crypto/arm64/aes-cipher-core.S
lib/crypto/arm64/chacha-neon-core.S
lib/crypto/arm64/ghash-neon-core.S
lib/crypto/arm64/sha1-ce-core.S
lib/crypto/arm64/sha256-ce.S
lib/crypto/arm64/sha512-ce-core.S
lib/crypto/arm64/sm3-ce-core.S

index 651f701c56a86aff878fee7b7c28612711ce5ecc..0b05ec4be65fbe512507f2524991c7916d5ca6e6 100644 (file)
        ldp             w8, w9, [rk], #16
        ldp             w10, w11, [rk, #-8]
 
-CPU_BE(        rev             w4, w4          )
-CPU_BE(        rev             w5, w5          )
-CPU_BE(        rev             w6, w6          )
-CPU_BE(        rev             w7, w7          )
-
        eor             w4, w4, w8
        eor             w5, w5, w9
        eor             w6, w6, w10
@@ -112,11 +107,6 @@ CPU_BE(    rev             w7, w7          )
 3:     adr_l           tt, \ltab
        \round          w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
 
-CPU_BE(        rev             w4, w4          )
-CPU_BE(        rev             w5, w5          )
-CPU_BE(        rev             w6, w6          )
-CPU_BE(        rev             w7, w7          )
-
        stp             w4, w5, [out]
        stp             w6, w7, [out, #8]
        ret
index 80079586ecc7a22bd54ce0584679267a99181ac6..cb18eec968bdf192462a48452c3e7c309b0befdb 100644 (file)
@@ -531,10 +531,6 @@ SYM_FUNC_START(chacha_4block_xor_neon)
        add             v3.4s, v3.4s, v19.4s
          add           a2, a2, w8
          add           a3, a3, w9
-CPU_BE(          rev           a0, a0          )
-CPU_BE(          rev           a1, a1          )
-CPU_BE(          rev           a2, a2          )
-CPU_BE(          rev           a3, a3          )
 
        ld4r            {v24.4s-v27.4s}, [x0], #16
        ld4r            {v28.4s-v31.4s}, [x0]
@@ -555,10 +551,6 @@ CPU_BE(      rev           a3, a3          )
        add             v7.4s, v7.4s, v23.4s
          add           a6, a6, w8
          add           a7, a7, w9
-CPU_BE(          rev           a4, a4          )
-CPU_BE(          rev           a5, a5          )
-CPU_BE(          rev           a6, a6          )
-CPU_BE(          rev           a7, a7          )
 
        // x8[0-3] += s2[0]
        // x9[0-3] += s2[1]
@@ -576,10 +568,6 @@ CPU_BE(      rev           a7, a7          )
        add             v11.4s, v11.4s, v27.4s
          add           a10, a10, w8
          add           a11, a11, w9
-CPU_BE(          rev           a8, a8          )
-CPU_BE(          rev           a9, a9          )
-CPU_BE(          rev           a10, a10        )
-CPU_BE(          rev           a11, a11        )
 
        // x12[0-3] += s3[0]
        // x13[0-3] += s3[1]
@@ -597,10 +585,6 @@ CPU_BE(      rev           a11, a11        )
        add             v15.4s, v15.4s, v31.4s
          add           a14, a14, w8
          add           a15, a15, w9
-CPU_BE(          rev           a12, a12        )
-CPU_BE(          rev           a13, a13        )
-CPU_BE(          rev           a14, a14        )
-CPU_BE(          rev           a15, a15        )
 
        // interleave 32-bit words in state n, n+1
          ldp           w6, w7, [x2], #64
index 85b20fcd98fef574e7e8f9aef0d1d205caa8e5d0..4c5799172b49c045a823e4615c0e9dd3ee609afe 100644 (file)
@@ -192,7 +192,7 @@ SYM_FUNC_START(pmull_ghash_update_p8)
        sub             x0, x0, #1
 
        /* multiply XL by SHASH in GF(2^128) */
-CPU_LE(        rev64           T1.16b, T1.16b  )
+       rev64           T1.16b, T1.16b
 
        ext             T2.16b, XL.16b, XL.16b, #8
        ext             IN1.16b, T1.16b, T1.16b, #8
index 59d27fda071440ed93a404e8fcf9c2553e106f6f..40f2a6c8d0c563e065d274df7ed6ddd7f9819489 100644 (file)
@@ -80,10 +80,10 @@ SYM_FUNC_START(sha1_ce_transform)
 0:     ld1             {v8.4s-v11.4s}, [x1], #64
        sub             x2, x2, #1
 
-CPU_LE(        rev32           v8.16b, v8.16b          )
-CPU_LE(        rev32           v9.16b, v9.16b          )
-CPU_LE(        rev32           v10.16b, v10.16b        )
-CPU_LE(        rev32           v11.16b, v11.16b        )
+       rev32           v8.16b, v8.16b
+       rev32           v9.16b, v9.16b
+       rev32           v10.16b, v10.16b
+       rev32           v11.16b, v11.16b
 
        add             t0.4s, v8.4s, k0.4s
        mov             dg0v.16b, dgav.16b
index b54ad977afa339c7eb4e3788d6e77ab2eb21d602..8fdbf0a9ff894de7f9c8d77f7b0b2f73b188e711 100644 (file)
@@ -94,10 +94,10 @@ SYM_FUNC_START(sha256_ce_transform)
 0:     ld1             {v16.4s-v19.4s}, [x1], #64
        sub             x2, x2, #1
 
-CPU_LE(        rev32           v16.16b, v16.16b        )
-CPU_LE(        rev32           v17.16b, v17.16b        )
-CPU_LE(        rev32           v18.16b, v18.16b        )
-CPU_LE(        rev32           v19.16b, v19.16b        )
+       rev32           v16.16b, v16.16b
+       rev32           v17.16b, v17.16b
+       rev32           v18.16b, v18.16b
+       rev32           v19.16b, v19.16b
 
        add             t0.4s, v16.4s, v0.4s
        mov             dg0v.16b, dgav.16b
@@ -289,14 +289,14 @@ SYM_FUNC_START(sha256_ce_finup2x)
        ld1             {v20.4s-v23.4s}, [data2], #64
 .Lfinup2x_loop_have_data:
        // Convert the words of the data blocks from big endian.
-CPU_LE(        rev32           v16.16b, v16.16b        )
-CPU_LE(        rev32           v17.16b, v17.16b        )
-CPU_LE(        rev32           v18.16b, v18.16b        )
-CPU_LE(        rev32           v19.16b, v19.16b        )
-CPU_LE(        rev32           v20.16b, v20.16b        )
-CPU_LE(        rev32           v21.16b, v21.16b        )
-CPU_LE(        rev32           v22.16b, v22.16b        )
-CPU_LE(        rev32           v23.16b, v23.16b        )
+       rev32           v16.16b, v16.16b
+       rev32           v17.16b, v17.16b
+       rev32           v18.16b, v18.16b
+       rev32           v19.16b, v19.16b
+       rev32           v20.16b, v20.16b
+       rev32           v21.16b, v21.16b
+       rev32           v22.16b, v22.16b
+       rev32           v23.16b, v23.16b
 .Lfinup2x_loop_have_bswapped_data:
 
        // Save the original state for each block.
@@ -336,11 +336,8 @@ CPU_LE(    rev32           v23.16b, v23.16b        )
        sub             w8, len, #64            // w8 = len - 64
        add             data1, data1, w8, sxtw  // data1 += len - 64
        add             data2, data2, w8, sxtw  // data2 += len - 64
-CPU_LE(        mov             x9, #0x80               )
-CPU_LE(        fmov            d16, x9                 )
-CPU_BE(        movi            v16.16b, #0             )
-CPU_BE(        mov             x9, #0x8000000000000000 )
-CPU_BE(        mov             v16.d[1], x9            )
+       mov             x9, #0x80
+       fmov            d16, x9
        movi            v17.16b, #0
        stp             q16, q17, [sp, #64]
        stp             q17, q17, [sp, #96]
@@ -348,7 +345,7 @@ CPU_BE(     mov             v16.d[1], x9            )
        cmp             len, #56
        b.ge            1f              // will count spill into its own block?
        lsl             count, count, #3
-CPU_LE(        rev             count, count            )
+       rev             count, count
        str             count, [x9, #56]
        mov             final_step, #2  // won't need count-only block
        b               2f
@@ -393,10 +390,10 @@ CPU_LE(   rev             count, count            )
 
 .Lfinup2x_done:
        // Write the two digests with all bytes in the correct order.
-CPU_LE(        rev32           state0_a.16b, state0_a.16b      )
-CPU_LE(        rev32           state1_a.16b, state1_a.16b      )
-CPU_LE(        rev32           state0_b.16b, state0_b.16b      )
-CPU_LE(        rev32           state1_b.16b, state1_b.16b      )
+       rev32           state0_a.16b, state0_a.16b
+       rev32           state1_a.16b, state1_a.16b
+       rev32           state0_b.16b, state0_b.16b
+       rev32           state1_b.16b, state1_b.16b
        st1             {state0_a.4s-state1_a.4s}, [out1]
        st1             {state0_b.4s-state1_b.4s}, [out2]
        add             sp, sp, #128
index 26834921e8d6f8801a11df6ad73ff65a2bdd350c..daa2c63a20c0d3ea4edd25d05bd531c37603c659 100644 (file)
@@ -110,14 +110,14 @@ SYM_FUNC_START(sha512_ce_transform)
        ld1             {v16.2d-v19.2d}, [x1], #64
        sub             x2, x2, #1
 
-CPU_LE(        rev64           v12.16b, v12.16b        )
-CPU_LE(        rev64           v13.16b, v13.16b        )
-CPU_LE(        rev64           v14.16b, v14.16b        )
-CPU_LE(        rev64           v15.16b, v15.16b        )
-CPU_LE(        rev64           v16.16b, v16.16b        )
-CPU_LE(        rev64           v17.16b, v17.16b        )
-CPU_LE(        rev64           v18.16b, v18.16b        )
-CPU_LE(        rev64           v19.16b, v19.16b        )
+       rev64           v12.16b, v12.16b
+       rev64           v13.16b, v13.16b
+       rev64           v14.16b, v14.16b
+       rev64           v15.16b, v15.16b
+       rev64           v16.16b, v16.16b
+       rev64           v17.16b, v17.16b
+       rev64           v18.16b, v18.16b
+       rev64           v19.16b, v19.16b
 
        mov             x4, x3                          // rc pointer
 
index 9cef7ea7f34f07419436b73ec7adaba710a5ad9a..ee7f900d7cff7d62903a16444a599f250a61f258 100644 (file)
@@ -91,10 +91,10 @@ SYM_FUNC_START(sm3_ce_transform)
        mov             v15.16b, v8.16b
        mov             v16.16b, v9.16b
 
-CPU_LE(        rev32           v0.16b, v0.16b          )
-CPU_LE(        rev32           v1.16b, v1.16b          )
-CPU_LE(        rev32           v2.16b, v2.16b          )
-CPU_LE(        rev32           v3.16b, v3.16b          )
+       rev32           v0.16b, v0.16b
+       rev32           v1.16b, v1.16b
+       rev32           v2.16b, v2.16b
+       rev32           v3.16b, v3.16b
 
        ext             v11.16b, v13.16b, v13.16b, #4