]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
crypto: x86/aes-gcm - code size optimization
authorEric Biggers <ebiggers@google.com>
Thu, 12 Dec 2024 21:28:38 +0000 (13:28 -0800)
committerHerbert Xu <herbert@gondor.apana.org.au>
Sat, 21 Dec 2024 14:46:24 +0000 (22:46 +0800)
Prefer immediates of -128 to 128, since the former fits in a signed
byte, saving 3 bytes per instruction.  Also replace a vpand and vpxor
with a vpternlogd.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/aes-gcm-avx10-x86_64.S

index 97e0ee515fc5fe23f22524a4d159959b6d0b54c5..8989bf9b8384ddc7911b5f97a50bda65b25e944e 100644 (file)
        vpshufd         $0xd3, H_CUR_XMM, %xmm0
        vpsrad          $31, %xmm0, %xmm0
        vpaddq          H_CUR_XMM, H_CUR_XMM, H_CUR_XMM
-       vpand           .Lgfpoly_and_internal_carrybit(%rip), %xmm0, %xmm0
-       vpxor           %xmm0, H_CUR_XMM, H_CUR_XMM
+       // H_CUR_XMM ^= xmm0 & gfpoly_and_internal_carrybit
+       vpternlogd      $0x78, .Lgfpoly_and_internal_carrybit(%rip), %xmm0, H_CUR_XMM
 
        // Load the gfpoly constant.
        vbroadcasti32x4 .Lgfpoly(%rip), GFPOLY
        // Pre-subtracting 4*VL from DATALEN saves an instruction from the main
        // loop and also ensures that at least one write always occurs to
        // DATALEN, zero-extending it and allowing DATALEN64 to be used later.
-       sub             $4*VL, DATALEN
+       add             $-4*VL, DATALEN  // shorter than 'sub 4*VL' when VL=32
        jl              .Lcrypt_loop_4x_done\@
 
        // Load powers of the hash key.
        vmovdqu8        GHASHDATA1, 1*VL(DST)
        vmovdqu8        GHASHDATA2, 2*VL(DST)
        vmovdqu8        GHASHDATA3, 3*VL(DST)
-       add             $4*VL, SRC
-       add             $4*VL, DST
-       sub             $4*VL, DATALEN
+       sub             $-4*VL, SRC  // shorter than 'add 4*VL' when VL=32
+       sub             $-4*VL, DST
+       add             $-4*VL, DATALEN
        jl              .Lghash_last_ciphertext_4x\@
 .endif
 
        vmovdqu8        GHASHDATA2, 2*VL(DST)
        vmovdqu8        GHASHDATA3, 3*VL(DST)
 
-       add             $4*VL, SRC
-       add             $4*VL, DST
-       sub             $4*VL, DATALEN
+       sub             $-4*VL, SRC  // shorter than 'add 4*VL' when VL=32
+       sub             $-4*VL, DST
+       add             $-4*VL, DATALEN
        jge             .Lcrypt_loop_4x\@
 
 .if \enc
 .Lcrypt_loop_4x_done\@:
 
        // Undo the extra subtraction by 4*VL and check whether data remains.
-       add             $4*VL, DATALEN
+       sub             $-4*VL, DATALEN  // shorter than 'add 4*VL' when VL=32
        jz              .Ldone\@
 
        // The data length isn't a multiple of 4*VL.  Process the remaining data