]> git.ipfire.org Git - thirdparty/rng-tools.git/commitdiff
rdrand_asm.S: On x86-64 we have enough registers, avoid repeated loads
authorH. Peter Anvin <hpa@linux.intel.com>
Tue, 25 Feb 2014 05:49:58 +0000 (21:49 -0800)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 4 Mar 2014 22:45:52 +0000 (14:45 -0800)
On x86-64 there are enough registers that there really is no point in
using a repeated memory operand for the key material.  Load it into a
register instead, hopefully it will be slightly faster.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
rdrand_asm.S

index 0bd4b041a40862c652beee2c1853c54caafc3f68..4b8fdc5eccf3aa533b13b2ec5db4df24f3ace04d 100644 (file)
@@ -122,7 +122,20 @@ ENTRY(x86_aes_mangle)
        pxor    (6*16)(PTR1), %xmm6
        pxor    (7*16)(PTR1), %xmm7
 
+offset = 0
        .rept 10
+#ifdef __x86_64__
+       movdqa  offset(PTR2), %xmm8
+offset = offset + 16
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xc0   /* aesenc %xmm8, %xmm0 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xc8   /* aesenc %xmm8, %xmm1 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xd0   /* aesenc %xmm8, %xmm2 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xd8   /* aesenc %xmm8, %xmm3 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xe0   /* aesenc %xmm8, %xmm4 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xe8   /* aesenc %xmm8, %xmm5 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xf0   /* aesenc %xmm8, %xmm6 */
+       .byte   0x66,0x41,0x0f,0x38,0xdc,0xf8   /* aesenc %xmm8, %xmm7 */
+#else
        .byte   0x66,0x0f,0x38,0xdc,0x00+NPTR2  /* aesenc (PTR2), %xmm0 */
        .byte   0x66,0x0f,0x38,0xdc,0x08+NPTR2  /* aesenc (PTR2), %xmm1 */
        .byte   0x66,0x0f,0x38,0xdc,0x10+NPTR2  /* aesenc (PTR2), %xmm2 */
@@ -132,8 +145,20 @@ ENTRY(x86_aes_mangle)
        .byte   0x66,0x0f,0x38,0xdc,0x30+NPTR2  /* aesenc (PTR2), %xmm6 */
        .byte   0x66,0x0f,0x38,0xdc,0x38+NPTR2  /* aesenc (PTR2), %xmm7 */
        add     $16, PTR2
+#endif
        .endr
 
+#ifdef __x86_64__
+       movdqa  offset(PTR2), %xmm8
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xc0   /* aesenclast %xmm8, %xmm0 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xc8   /* aesenclast %xmm8, %xmm1 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xd0   /* aesenclast %xmm8, %xmm2 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xd8   /* aesenclast %xmm8, %xmm3 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xe0   /* aesenclast %xmm8, %xmm4 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xe8   /* aesenclast %xmm8, %xmm5 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xf0   /* aesenclast %xmm8, %xmm6 */
+       .byte   0x66,0x41,0x0f,0x38,0xdd,0xf8   /* aesenclast %xmm8, %xmm7 */
+#else
        .byte   0x66,0x0f,0x38,0xdd,0x00+NPTR2  /* aesenclast (PTR2), %xmm0 */
        .byte   0x66,0x0f,0x38,0xdd,0x08+NPTR2  /* aesenclast (PTR2), %xmm1 */
        .byte   0x66,0x0f,0x38,0xdd,0x10+NPTR2  /* aesenclast (PTR2), %xmm2 */
@@ -142,6 +167,7 @@ ENTRY(x86_aes_mangle)
        .byte   0x66,0x0f,0x38,0xdd,0x28+NPTR2  /* aesenclast (PTR2), %xmm5 */
        .byte   0x66,0x0f,0x38,0xdd,0x30+NPTR2  /* aesenclast (PTR2), %xmm6 */
        .byte   0x66,0x0f,0x38,0xdd,0x38+NPTR2  /* aesenclast (PTR2), %xmm7 */
+#endif
 
        movdqa  %xmm0, (0*16)(PTR0)
        movdqa  %xmm1, (1*16)(PTR0)