/* * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu , * H. Peter Anvin * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. * */ #if defined(__i386__) || defined(__x86_64__) #define ENTRY(x) \ .balign 64 ; \ .globl x ; \ x: #define ENDPROC(x) \ .size x, .-x ; \ .type x, @function #define RDRAND_RETRY_LIMIT 10 #ifdef __x86_64__ ENTRY(x86_rdrand_bytes) mov %esi, %eax 1: mov $RDRAND_RETRY_LIMIT, %ecx 2: .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */ jnc 3f mov %rdx, (%rdi) add $8, %rdi sub $8, %esi ja 1b 4: sub %esi, %eax ret 3: dec %ecx rep;nop jnz 2b jmp 4b ENDPROC(x86_rdrand_bytes) ENTRY(x86_rdseed_or_rdrand_bytes) mov (%rsi), %r8d /* RDSEED count */ mov (%rcx), %r9d /* RDRAND count */ 1: mov $RDRAND_RETRY_LIMIT, %r10d 2: .byte 0x48,0x0f,0xc7,0xf8 /* rdseed %rax */ jnc 3f mov %rax, (%rdi) add $8, %rdi sub $8, %r8d ja 1b 4: sub %r8d, (%rsi) sub %r9d, (%rcx) ret 3: .byte 0x48,0x0f,0xc7,0xf0 /* rdrand %rax */ jnc 5f mov %rax, (%rdx) add $8, %rdx sub $8, %r9d ja 1b jmp 4b 5: dec %r10d rep;nop jnz 2b jmp 4b ENDPROC(x86_rdseed_or_rdrand_bytes) #define SETPTR(var,ptr) leaq var(%rip),ptr #define PTR0 %rdi #define PTR1 %rsi #define PTR2 %rcx #define CTR3 %eax #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ #elif defined(__i386__) ENTRY(x86_rdrand_bytes) push %ebp mov %esp, %ebp push %edi push %esi movl 8(%ebp), %edi movl 12(%ebp), %esi mov %esi, %eax 1: mov $RDRAND_RETRY_LIMIT, %ecx 2: .byte 0x0f,0xc7,0xf2 /* rdrand %edx */ jnc 3f mov %edx, (%edi) add $4, %edi sub $4, %esi ja 1b 4: sub %esi, %eax pop %esi pop %edi pop %ebp ret 3: dec %ecx rep;nop jnz 2b jmp 4b ENDPROC(x86_rdrand_bytes) ENTRY(x86_rdseed_or_rdrand_bytes) push %ebp mov %esp, %ebp push %edi push %esi push %ebx mov 12(%ebp), %ebx mov 20(%ebp), %esi mov 8(%ebp), %edi /* RDSEED pointer */ mov 16(%ebp), %edx /* RDRAND pointer */ mov (%ebx), %ebx /* RDSEED count */ mov (%esi), %esi /* RDRAND count */ 1: mov $RDRAND_RETRY_LIMIT, %ecx 2: .byte 0x0f,0xc7,0xf8 /* rdseed %eax */ jnc 3f mov %eax, (%edi) add $4, %edi sub $4, %ebx ja 1b 4: mov 12(%ebp), %edx mov 20(%ebp), %eax sub %ebx, (%edx) /* RDSEED count */ sub %esi, (%eax) /* RDRAND count */ pop %ebx pop %esi pop %edi pop %ebp ret 3: .byte 0x0f,0xc7,0xf0 /* rdrand %eax */ jnc 5f mov %eax, (%edx) add $4, %edx sub $4, %esi jnz 1b ja 4b 5: dec %ecx rep;nop jnz 2b jmp 4b ENDPROC(x86_rdseed_or_rdrand_bytes) #define SETPTR(var,ptr) movl $(var),ptr #define PTR0 %eax #define PTR1 %edx #define PTR2 %ecx #define CTR3 %esi #define NPTR2 1 /* %rcx = %r1 */ #endif ENTRY(x86_aes_mangle) #ifdef __i386__ push %ebp mov %esp, %ebp movl 8(%ebp), %eax movl 12(%ebp), %edx push %esi #endif movl $512, CTR3 /* Number of rounds */ movdqa (0*16)(PTR1), %xmm0 movdqa (1*16)(PTR1), %xmm1 movdqa (2*16)(PTR1), %xmm2 movdqa (3*16)(PTR1), %xmm3 movdqa (4*16)(PTR1), %xmm4 movdqa (5*16)(PTR1), %xmm5 movdqa (6*16)(PTR1), %xmm6 movdqa (7*16)(PTR1), %xmm7 #ifdef __x86_64__ SETPTR(aes_round_keys, PTR2) 1: #else 1: SETPTR(aes_round_keys, PTR2) #endif /* 8192 = 512 (rounds) * 16 (bytes) */ pxor (0*8192)(PTR0), %xmm0 pxor (1*8192)(PTR0), %xmm1 pxor (2*8192)(PTR0), %xmm2 pxor (3*8192)(PTR0), %xmm3 pxor (4*8192)(PTR0), %xmm4 pxor (5*8192)(PTR0), %xmm5 pxor (6*8192)(PTR0), %xmm6 pxor (7*8192)(PTR0), %xmm7 add $16, PTR0 offset = 0 .rept 10 #ifdef __x86_64__ movdqa offset(PTR2), %xmm8 offset = offset + 16 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */ #else .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */ .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */ .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */ .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */ .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */ .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */ .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */ .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */ add $16, PTR2 #endif .endr #ifdef __x86_64__ movdqa offset(PTR2), %xmm8 .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */ #else .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */ .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */ .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */ .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */ .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */ .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */ .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */ .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */ #endif sub $1, CTR3 jnz 1b movdqa %xmm0, (0*16)(PTR1) movdqa %xmm1, (1*16)(PTR1) movdqa %xmm2, (2*16)(PTR1) movdqa %xmm3, (3*16)(PTR1) movdqa %xmm4, (4*16)(PTR1) movdqa %xmm5, (5*16)(PTR1) movdqa %xmm6, (6*16)(PTR1) movdqa %xmm7, (7*16)(PTR1) #ifdef __i386__ pop %esi pop %ebp #endif ret ENDPROC(x86_aes_mangle) /* aeskeygenassist $imm,%xmm0,%xmm1 */ #define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm ENTRY(x86_aes_expand_key) #ifdef __i386__ push %ebp mov %esp, %ebp movl 8(%ebp), %eax #endif SETPTR(aes_round_keys, PTR1) movdqu (PTR0), %xmm0 movdqa %xmm0, (PTR1) /* First slot = the plain key */ add $16, PTR1 AESKEYGENASSIST(0x01) call 1f AESKEYGENASSIST(0x02) call 1f AESKEYGENASSIST(0x04) call 1f AESKEYGENASSIST(0x08) call 1f AESKEYGENASSIST(0x10) call 1f AESKEYGENASSIST(0x20) call 1f AESKEYGENASSIST(0x40) call 1f AESKEYGENASSIST(0x80) call 1f AESKEYGENASSIST(0x1b) call 1f AESKEYGENASSIST(0x36) call 1f #ifdef __i386__ pop %ebp #endif ret 1: pshufd $0xff, %xmm1, %xmm1 movdqa %xmm0, %xmm2 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pxor %xmm1, %xmm0 movdqa %xmm0, (PTR1) add $16, PTR1 ret ENDPROC(x86_aes_expand_key) .bss .balign 64 aes_round_keys: .space 11*16 .size aes_round_keys, .-aes_round_keys #endif /* i386 or x86_64 */ /* * This is necessary to keep the whole executable * from needing a writable stack. */ .section .note.GNU-stack,"",%progbits