crypto: arm64/aes-blk - improve XTS mask handling

author Ard Biesheuvel <ard.biesheuvel@linaro.org>

Mon, 10 Sep 2018 14:41:15 +0000 (16:41 +0200)

committer Herbert Xu <herbert@gondor.apana.org.au>

Fri, 21 Sep 2018 05:24:50 +0000 (13:24 +0800)
author Ard Biesheuvel <ard.biesheuvel@linaro.org>
Mon, 10 Sep 2018 14:41:15 +0000 (16:41 +0200)
committer Herbert Xu <herbert@gondor.apana.org.au>
Fri, 21 Sep 2018 05:24:50 +0000 (13:24 +0800)
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S

index 623e74ed1c67f1d634e5762d35ddeaf39b73a498..143070510809ac1df415f785c315c110b8df7ada 100644 (file)
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
  
         .arch           armv8-a+crypto
  
+       xtsmask         .req    v16
+
+       .macro          xts_reload_mask, tmp
+       .endm
+
         /* preload all round keys */
         .macro          load_round_keys, rounds, rk
         cmp             \rounds, #12
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S

index 9697eda3b4d1bad51df070df6c877d2766671eb5..039738ae23f662f7ad656815e0a922485bc54428 100644 (file)
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -340,17 +340,19 @@ AES_ENDPROC(aes_ctr_encrypt)
          *                 int blocks, u8 const rk2[], u8 iv[], int first)
          */
  
-       .macro          next_tweak, out, in, const, tmp
+       .macro          next_tweak, out, in, tmp
         sshr            \tmp\().2d,  \in\().2d,   #63
-       and             \tmp\().16b, \tmp\().16b, \const\().16b
+       and             \tmp\().16b, \tmp\().16b, xtsmask.16b
         add             \out\().2d,  \in\().2d,   \in\().2d
         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
         eor             \out\().16b, \out\().16b, \tmp\().16b
         .endm
  
-.Lxts_mul_x:
-CPU_LE(        .quad           1, 0x87         )
-CPU_BE(        .quad           0x87, 1         )
+       .macro          xts_load_mask, tmp
+       movi            xtsmask.2s, #0x1
+       movi            \tmp\().2s, #0x87
+       uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
+       .endm
  
  AES_ENTRY(aes_xts_encrypt)
         stp             x29, x30, [sp, #-16]!
@@ -362,24 +364,24 @@ AES_ENTRY(aes_xts_encrypt)
         enc_prepare     w3, x5, x8
         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
         enc_switch_key  w3, x2, x8
-       ldr             q7, .Lxts_mul_x
+       xts_load_mask   v8
         b               .LxtsencNx
  
  .Lxtsencnotfirst:
         enc_prepare     w3, x2, x8
  .LxtsencloopNx:
-       ldr             q7, .Lxts_mul_x
-       next_tweak      v4, v4, v7, v8
+       xts_reload_mask v8
+       next_tweak      v4, v4, v8
  .LxtsencNx:
         subs            w4, w4, #4
         bmi             .Lxtsenc1x
         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
-       next_tweak      v5, v4, v7, v8
+       next_tweak      v5, v4, v8
         eor             v0.16b, v0.16b, v4.16b
-       next_tweak      v6, v5, v7, v8
+       next_tweak      v6, v5, v8
         eor             v1.16b, v1.16b, v5.16b
         eor             v2.16b, v2.16b, v6.16b
-       next_tweak      v7, v6, v7, v8
+       next_tweak      v7, v6, v8
         eor             v3.16b, v3.16b, v7.16b
         bl              aes_encrypt_block4x
         eor             v3.16b, v3.16b, v7.16b
@@ -401,7 +403,7 @@ AES_ENTRY(aes_xts_encrypt)
         st1             {v0.16b}, [x0], #16
         subs            w4, w4, #1
         beq             .Lxtsencout
-       next_tweak      v4, v4, v7, v8
+       next_tweak      v4, v4, v8
         b               .Lxtsencloop
  .Lxtsencout:
         st1             {v4.16b}, [x6]
@@ -420,24 +422,24 @@ AES_ENTRY(aes_xts_decrypt)
         enc_prepare     w3, x5, x8
         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
         dec_prepare     w3, x2, x8
-       ldr             q7, .Lxts_mul_x
+       xts_load_mask   v8
         b               .LxtsdecNx
  
  .Lxtsdecnotfirst:
         dec_prepare     w3, x2, x8
  .LxtsdecloopNx:
-       ldr             q7, .Lxts_mul_x
-       next_tweak      v4, v4, v7, v8
+       xts_reload_mask v8
+       next_tweak      v4, v4, v8
  .LxtsdecNx:
         subs            w4, w4, #4
         bmi             .Lxtsdec1x
         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
-       next_tweak      v5, v4, v7, v8
+       next_tweak      v5, v4, v8
         eor             v0.16b, v0.16b, v4.16b
-       next_tweak      v6, v5, v7, v8
+       next_tweak      v6, v5, v8
         eor             v1.16b, v1.16b, v5.16b
         eor             v2.16b, v2.16b, v6.16b
-       next_tweak      v7, v6, v7, v8
+       next_tweak      v7, v6, v8
         eor             v3.16b, v3.16b, v7.16b
         bl              aes_decrypt_block4x
         eor             v3.16b, v3.16b, v7.16b
@@ -459,7 +461,7 @@ AES_ENTRY(aes_xts_decrypt)
         st1             {v0.16b}, [x0], #16
         subs            w4, w4, #1
         beq             .Lxtsdecout
-       next_tweak      v4, v4, v7, v8
+       next_tweak      v4, v4, v8
         b               .Lxtsdecloop
  .Lxtsdecout:
         st1             {v4.16b}, [x6]
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S

index 1c7b45b7268e4677fe589830830b08732d6cf138..29100f692e8a03034f100eae9fc6b2bba64370c8 100644 (file)
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
  #define AES_ENTRY(func)                ENTRY(neon_ ## func)
  #define AES_ENDPROC(func)      ENDPROC(neon_ ## func)
  
+       xtsmask         .req    v7
+
+       .macro          xts_reload_mask, tmp
+       xts_load_mask   \tmp
+       .endm
+
         /* multiply by polynomial 'x' in GF(2^8) */
         .macro          mul_by_x, out, in, temp, const
         sshr            \temp, \in, #7
author	Ard Biesheuvel <ard.biesheuvel@linaro.org>
	Mon, 10 Sep 2018 14:41:15 +0000 (16:41 +0200)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Fri, 21 Sep 2018 05:24:50 +0000 (13:24 +0800)
arch/arm64/crypto/aes-ce.S		patch \| blob \| blame \| history
arch/arm64/crypto/aes-modes.S		patch \| blob \| blame \| history
arch/arm64/crypto/aes-neon.S		patch \| blob \| blame \| history