lib/crypto: arm64: Assume a little-endian kernel

author Eric Biggers <ebiggers@kernel.org>

Wed, 1 Apr 2026 00:33:31 +0000 (17:33 -0700)

committer Eric Biggers <ebiggers@kernel.org>

Wed, 1 Apr 2026 20:02:15 +0000 (13:02 -0700)
author Eric Biggers <ebiggers@kernel.org>
Wed, 1 Apr 2026 00:33:31 +0000 (17:33 -0700)
committer Eric Biggers <ebiggers@kernel.org>
Wed, 1 Apr 2026 20:02:15 +0000 (13:02 -0700)
diff --git a/lib/crypto/arm64/aes-cipher-core.S b/lib/crypto/arm64/aes-cipher-core.S

index 651f701c56a86aff878fee7b7c28612711ce5ecc..0b05ec4be65fbe512507f2524991c7916d5ca6e6 100644 (file)
--- a/lib/crypto/arm64/aes-cipher-core.S
+++ b/lib/crypto/arm64/aes-cipher-core.S
@@ -87,11 +87,6 @@
         ldp             w8, w9, [rk], #16
         ldp             w10, w11, [rk, #-8]
  
-CPU_BE(        rev             w4, w4          )
-CPU_BE(        rev             w5, w5          )
-CPU_BE(        rev             w6, w6          )
-CPU_BE(        rev             w7, w7          )
-
         eor             w4, w4, w8
         eor             w5, w5, w9
         eor             w6, w6, w10
@@ -112,11 +107,6 @@ CPU_BE(    rev             w7, w7          )
  3:     adr_l           tt, \ltab
         \round          w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
  
-CPU_BE(        rev             w4, w4          )
-CPU_BE(        rev             w5, w5          )
-CPU_BE(        rev             w6, w6          )
-CPU_BE(        rev             w7, w7          )
-
         stp             w4, w5, [out]
         stp             w6, w7, [out, #8]
         ret
diff --git a/lib/crypto/arm64/chacha-neon-core.S b/lib/crypto/arm64/chacha-neon-core.S

index 80079586ecc7a22bd54ce0584679267a99181ac6..cb18eec968bdf192462a48452c3e7c309b0befdb 100644 (file)
--- a/lib/crypto/arm64/chacha-neon-core.S
+++ b/lib/crypto/arm64/chacha-neon-core.S
@@ -531,10 +531,6 @@ SYM_FUNC_START(chacha_4block_xor_neon)
         add             v3.4s, v3.4s, v19.4s
           add           a2, a2, w8
           add           a3, a3, w9
-CPU_BE(          rev           a0, a0          )
-CPU_BE(          rev           a1, a1          )
-CPU_BE(          rev           a2, a2          )
-CPU_BE(          rev           a3, a3          )
  
         ld4r            {v24.4s-v27.4s}, [x0], #16
         ld4r            {v28.4s-v31.4s}, [x0]
@@ -555,10 +551,6 @@ CPU_BE(      rev           a3, a3          )
         add             v7.4s, v7.4s, v23.4s
           add           a6, a6, w8
           add           a7, a7, w9
-CPU_BE(          rev           a4, a4          )
-CPU_BE(          rev           a5, a5          )
-CPU_BE(          rev           a6, a6          )
-CPU_BE(          rev           a7, a7          )
  
         // x8[0-3] += s2[0]
         // x9[0-3] += s2[1]
@@ -576,10 +568,6 @@ CPU_BE(      rev           a7, a7          )
         add             v11.4s, v11.4s, v27.4s
           add           a10, a10, w8
           add           a11, a11, w9
-CPU_BE(          rev           a8, a8          )
-CPU_BE(          rev           a9, a9          )
-CPU_BE(          rev           a10, a10        )
-CPU_BE(          rev           a11, a11        )
  
         // x12[0-3] += s3[0]
         // x13[0-3] += s3[1]
@@ -597,10 +585,6 @@ CPU_BE(      rev           a11, a11        )
         add             v15.4s, v15.4s, v31.4s
           add           a14, a14, w8
           add           a15, a15, w9
-CPU_BE(          rev           a12, a12        )
-CPU_BE(          rev           a13, a13        )
-CPU_BE(          rev           a14, a14        )
-CPU_BE(          rev           a15, a15        )
  
         // interleave 32-bit words in state n, n+1
           ldp           w6, w7, [x2], #64
diff --git a/lib/crypto/arm64/ghash-neon-core.S b/lib/crypto/arm64/ghash-neon-core.S

index 85b20fcd98fef574e7e8f9aef0d1d205caa8e5d0..4c5799172b49c045a823e4615c0e9dd3ee609afe 100644 (file)
--- a/lib/crypto/arm64/ghash-neon-core.S
+++ b/lib/crypto/arm64/ghash-neon-core.S
@@ -192,7 +192,7 @@ SYM_FUNC_START(pmull_ghash_update_p8)
         sub             x0, x0, #1
  
         /* multiply XL by SHASH in GF(2^128) */
-CPU_LE(        rev64           T1.16b, T1.16b  )
+       rev64           T1.16b, T1.16b
  
         ext             T2.16b, XL.16b, XL.16b, #8
         ext             IN1.16b, T1.16b, T1.16b, #8
diff --git a/lib/crypto/arm64/sha1-ce-core.S b/lib/crypto/arm64/sha1-ce-core.S

index 59d27fda071440ed93a404e8fcf9c2553e106f6f..40f2a6c8d0c563e065d274df7ed6ddd7f9819489 100644 (file)
--- a/lib/crypto/arm64/sha1-ce-core.S
+++ b/lib/crypto/arm64/sha1-ce-core.S
@@ -80,10 +80,10 @@ SYM_FUNC_START(sha1_ce_transform)
  0:     ld1             {v8.4s-v11.4s}, [x1], #64
         sub             x2, x2, #1
  
-CPU_LE(        rev32           v8.16b, v8.16b          )
-CPU_LE(        rev32           v9.16b, v9.16b          )
-CPU_LE(        rev32           v10.16b, v10.16b        )
-CPU_LE(        rev32           v11.16b, v11.16b        )
+       rev32           v8.16b, v8.16b
+       rev32           v9.16b, v9.16b
+       rev32           v10.16b, v10.16b
+       rev32           v11.16b, v11.16b
  
         add             t0.4s, v8.4s, k0.4s
         mov             dg0v.16b, dgav.16b
diff --git a/lib/crypto/arm64/sha256-ce.S b/lib/crypto/arm64/sha256-ce.S

index b54ad977afa339c7eb4e3788d6e77ab2eb21d602..8fdbf0a9ff894de7f9c8d77f7b0b2f73b188e711 100644 (file)
--- a/lib/crypto/arm64/sha256-ce.S
+++ b/lib/crypto/arm64/sha256-ce.S
@@ -94,10 +94,10 @@ SYM_FUNC_START(sha256_ce_transform)
  0:     ld1             {v16.4s-v19.4s}, [x1], #64
         sub             x2, x2, #1
  
-CPU_LE(        rev32           v16.16b, v16.16b        )
-CPU_LE(        rev32           v17.16b, v17.16b        )
-CPU_LE(        rev32           v18.16b, v18.16b        )
-CPU_LE(        rev32           v19.16b, v19.16b        )
+       rev32           v16.16b, v16.16b
+       rev32           v17.16b, v17.16b
+       rev32           v18.16b, v18.16b
+       rev32           v19.16b, v19.16b
  
         add             t0.4s, v16.4s, v0.4s
         mov             dg0v.16b, dgav.16b
@@ -289,14 +289,14 @@ SYM_FUNC_START(sha256_ce_finup2x)
         ld1             {v20.4s-v23.4s}, [data2], #64
  .Lfinup2x_loop_have_data:
         // Convert the words of the data blocks from big endian.
-CPU_LE(        rev32           v16.16b, v16.16b        )
-CPU_LE(        rev32           v17.16b, v17.16b        )
-CPU_LE(        rev32           v18.16b, v18.16b        )
-CPU_LE(        rev32           v19.16b, v19.16b        )
-CPU_LE(        rev32           v20.16b, v20.16b        )
-CPU_LE(        rev32           v21.16b, v21.16b        )
-CPU_LE(        rev32           v22.16b, v22.16b        )
-CPU_LE(        rev32           v23.16b, v23.16b        )
+       rev32           v16.16b, v16.16b
+       rev32           v17.16b, v17.16b
+       rev32           v18.16b, v18.16b
+       rev32           v19.16b, v19.16b
+       rev32           v20.16b, v20.16b
+       rev32           v21.16b, v21.16b
+       rev32           v22.16b, v22.16b
+       rev32           v23.16b, v23.16b
  .Lfinup2x_loop_have_bswapped_data:
  
         // Save the original state for each block.
@@ -336,11 +336,8 @@ CPU_LE(    rev32           v23.16b, v23.16b        )
         sub             w8, len, #64            // w8 = len - 64
         add             data1, data1, w8, sxtw  // data1 += len - 64
         add             data2, data2, w8, sxtw  // data2 += len - 64
-CPU_LE(        mov             x9, #0x80               )
-CPU_LE(        fmov            d16, x9                 )
-CPU_BE(        movi            v16.16b, #0             )
-CPU_BE(        mov             x9, #0x8000000000000000 )
-CPU_BE(        mov             v16.d[1], x9            )
+       mov             x9, #0x80
+       fmov            d16, x9
         movi            v17.16b, #0
         stp             q16, q17, [sp, #64]
         stp             q17, q17, [sp, #96]
@@ -348,7 +345,7 @@ CPU_BE(     mov             v16.d[1], x9            )
         cmp             len, #56
         b.ge            1f              // will count spill into its own block?
         lsl             count, count, #3
-CPU_LE(        rev             count, count            )
+       rev             count, count
         str             count, [x9, #56]
         mov             final_step, #2  // won't need count-only block
         b               2f
@@ -393,10 +390,10 @@ CPU_LE(   rev             count, count            )
  
  .Lfinup2x_done:
         // Write the two digests with all bytes in the correct order.
-CPU_LE(        rev32           state0_a.16b, state0_a.16b      )
-CPU_LE(        rev32           state1_a.16b, state1_a.16b      )
-CPU_LE(        rev32           state0_b.16b, state0_b.16b      )
-CPU_LE(        rev32           state1_b.16b, state1_b.16b      )
+       rev32           state0_a.16b, state0_a.16b
+       rev32           state1_a.16b, state1_a.16b
+       rev32           state0_b.16b, state0_b.16b
+       rev32           state1_b.16b, state1_b.16b
         st1             {state0_a.4s-state1_a.4s}, [out1]
         st1             {state0_b.4s-state1_b.4s}, [out2]
         add             sp, sp, #128
diff --git a/lib/crypto/arm64/sha512-ce-core.S b/lib/crypto/arm64/sha512-ce-core.S

index 26834921e8d6f8801a11df6ad73ff65a2bdd350c..daa2c63a20c0d3ea4edd25d05bd531c37603c659 100644 (file)
--- a/lib/crypto/arm64/sha512-ce-core.S
+++ b/lib/crypto/arm64/sha512-ce-core.S
@@ -110,14 +110,14 @@ SYM_FUNC_START(sha512_ce_transform)
         ld1             {v16.2d-v19.2d}, [x1], #64
         sub             x2, x2, #1
  
-CPU_LE(        rev64           v12.16b, v12.16b        )
-CPU_LE(        rev64           v13.16b, v13.16b        )
-CPU_LE(        rev64           v14.16b, v14.16b        )
-CPU_LE(        rev64           v15.16b, v15.16b        )
-CPU_LE(        rev64           v16.16b, v16.16b        )
-CPU_LE(        rev64           v17.16b, v17.16b        )
-CPU_LE(        rev64           v18.16b, v18.16b        )
-CPU_LE(        rev64           v19.16b, v19.16b        )
+       rev64           v12.16b, v12.16b
+       rev64           v13.16b, v13.16b
+       rev64           v14.16b, v14.16b
+       rev64           v15.16b, v15.16b
+       rev64           v16.16b, v16.16b
+       rev64           v17.16b, v17.16b
+       rev64           v18.16b, v18.16b
+       rev64           v19.16b, v19.16b
  
         mov             x4, x3                          // rc pointer
  
diff --git a/lib/crypto/arm64/sm3-ce-core.S b/lib/crypto/arm64/sm3-ce-core.S

index 9cef7ea7f34f07419436b73ec7adaba710a5ad9a..ee7f900d7cff7d62903a16444a599f250a61f258 100644 (file)
--- a/lib/crypto/arm64/sm3-ce-core.S
+++ b/lib/crypto/arm64/sm3-ce-core.S
@@ -91,10 +91,10 @@ SYM_FUNC_START(sm3_ce_transform)
         mov             v15.16b, v8.16b
         mov             v16.16b, v9.16b
  
-CPU_LE(        rev32           v0.16b, v0.16b          )
-CPU_LE(        rev32           v1.16b, v1.16b          )
-CPU_LE(        rev32           v2.16b, v2.16b          )
-CPU_LE(        rev32           v3.16b, v3.16b          )
+       rev32           v0.16b, v0.16b
+       rev32           v1.16b, v1.16b
+       rev32           v2.16b, v2.16b
+       rev32           v3.16b, v3.16b
  
         ext             v11.16b, v13.16b, v13.16b, #4
author	Eric Biggers <ebiggers@kernel.org>
	Wed, 1 Apr 2026 00:33:31 +0000 (17:33 -0700)
committer	Eric Biggers <ebiggers@kernel.org>
	Wed, 1 Apr 2026 20:02:15 +0000 (13:02 -0700)
lib/crypto/arm64/aes-cipher-core.S		patch \| blob \| blame \| history
lib/crypto/arm64/chacha-neon-core.S		patch \| blob \| blame \| history
lib/crypto/arm64/ghash-neon-core.S		patch \| blob \| blame \| history
lib/crypto/arm64/sha1-ce-core.S		patch \| blob \| blame \| history
lib/crypto/arm64/sha256-ce.S		patch \| blob \| blame \| history
lib/crypto/arm64/sha512-ce-core.S		patch \| blob \| blame \| history
lib/crypto/arm64/sm3-ce-core.S		patch \| blob \| blame \| history