]> git.ipfire.org Git - thirdparty/rng-tools.git/blame - rdrand_asm.S
rdrand: Fix the RDRAND data reduction
[thirdparty/rng-tools.git] / rdrand_asm.S
CommitLineData
2a014536 1/*
94f03c9a 2 * Copyright (c) 2011-2014, Intel Corporation
2a014536
BH
3 * Authors: Fenghua Yu <fenghua.yu@intel.com>,
4 * H. Peter Anvin <hpa@linux.intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
795666e6
PA
21#if defined(__i386__) || defined(__x86_64__)
22
2a014536
BH
23#define ENTRY(x) \
24 .balign 64 ; \
25 .globl x ; \
26x:
27
28#define ENDPROC(x) \
29 .size x, .-x ; \
30 .type x, @function
31
32#define RDRAND_RETRY_LIMIT 10
33
795666e6 34#ifdef __x86_64__
2a014536 35
3164d3eb
PA
36ENTRY(x86_rdrand_bytes)
37 mov %esi, %eax
2a014536 381:
3164d3eb 39 mov $RDRAND_RETRY_LIMIT, %ecx
2a014536
BH
402:
41 .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */
42 jnc 3f
43 mov %rdx, (%rdi)
44 add $8, %rdi
3164d3eb
PA
45 sub $8, %esi
46 ja 1b
474:
48 sub %esi, %eax
2a014536
BH
49 ret
503:
3164d3eb 51 dec %ecx
2a014536
BH
52 rep;nop
53 jnz 2b
3164d3eb
PA
54 jmp 4b
55ENDPROC(x86_rdrand_bytes)
2a014536
BH
56
57#define SETPTR(var,ptr) leaq var(%rip),ptr
58#define PTR0 %rdi
59#define PTR1 %rsi
60#define PTR2 %rcx
3e89e082 61#define CTR3 %eax
2a014536
BH
62#define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */
63
64#elif defined(__i386__)
65
3164d3eb 66ENTRY(x86_rdrand_bytes)
2a014536
BH
67 push %ebp
68 mov %esp, %ebp
69 push %edi
3164d3eb
PA
70 push %esi
71 movl 8(%ebp), %edi
72 movl 12(%ebp), %esi
73
74 mov %esi, %eax
2a014536 751:
3164d3eb 76 mov $RDRAND_RETRY_LIMIT, %ecx
2a014536 772:
3164d3eb 78 .byte 0x0f,0xc7,0xf2 /* rdrand %edx */
2a014536 79 jnc 3f
3164d3eb
PA
80 mov %edx, (%edi)
81 add $4, %edi
82 sub $4, %esi
83 ja 1b
844:
85 sub %esi, %eax
86 pop %esi
2a014536
BH
87 pop %edi
88 pop %ebp
89 ret
903:
3164d3eb 91 dec %ecx
2a014536
BH
92 rep;nop
93 jnz 2b
3164d3eb
PA
94 jmp 4b
95ENDPROC(x86_rdrand_bytes)
2a014536
BH
96
97#define SETPTR(var,ptr) movl $(var),ptr
98#define PTR0 %eax
99#define PTR1 %edx
100#define PTR2 %ecx
3e89e082 101#define CTR3 %esi
2a014536
BH
102#define NPTR2 1 /* %rcx = %r1 */
103
104#endif
105
2a014536 106ENTRY(x86_aes_mangle)
795666e6 107#ifdef __i386__
2a014536
BH
108 push %ebp
109 mov %esp, %ebp
110 movl 8(%ebp), %eax
111 movl 12(%ebp), %edx
3e89e082 112 push %esi
2a014536 113#endif
3e89e082
PA
114 movl $512, CTR3 /* Number of rounds */
115
116 movdqa (0*16)(PTR1), %xmm0
117 movdqa (1*16)(PTR1), %xmm1
118 movdqa (2*16)(PTR1), %xmm2
119 movdqa (3*16)(PTR1), %xmm3
120 movdqa (4*16)(PTR1), %xmm4
121 movdqa (5*16)(PTR1), %xmm5
122 movdqa (6*16)(PTR1), %xmm6
123 movdqa (7*16)(PTR1), %xmm7
2a014536 124
3e89e082
PA
125#ifdef __x86_64__
126 SETPTR(aes_round_keys, PTR2)
1271:
128#else
1291:
2a014536 130 SETPTR(aes_round_keys, PTR2)
3e89e082 131#endif
2a014536 132
3e89e082
PA
133 /* 8192 = 512 (rounds) * 16 (bytes) */
134 pxor (0*8192)(PTR0), %xmm0
135 pxor (1*8192)(PTR0), %xmm1
136 pxor (2*8192)(PTR0), %xmm2
137 pxor (3*8192)(PTR0), %xmm3
138 pxor (4*8192)(PTR0), %xmm4
139 pxor (5*8192)(PTR0), %xmm5
140 pxor (6*8192)(PTR0), %xmm6
141 pxor (7*8192)(PTR0), %xmm7
142 add $16, PTR0
2a014536 143
c851f481 144offset = 0
2a014536 145 .rept 10
c851f481
PA
146#ifdef __x86_64__
147 movdqa offset(PTR2), %xmm8
148offset = offset + 16
149 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */
150 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
151 .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */
152 .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */
153 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */
154 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */
155 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */
156 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */
157#else
2a014536
BH
158 .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */
159 .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */
160 .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */
161 .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */
162 .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */
163 .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */
164 .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */
165 .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */
166 add $16, PTR2
c851f481 167#endif
2a014536
BH
168 .endr
169
c851f481
PA
170#ifdef __x86_64__
171 movdqa offset(PTR2), %xmm8
172 .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */
173 .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */
174 .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */
175 .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */
176 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */
177 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */
178 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */
179 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */
180#else
2a014536
BH
181 .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */
182 .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */
183 .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */
184 .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */
185 .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */
186 .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */
187 .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */
188 .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */
c851f481 189#endif
3e89e082
PA
190 sub $1, CTR3
191 jnz 1b
192
2a014536
BH
193 movdqa %xmm0, (0*16)(PTR1)
194 movdqa %xmm1, (1*16)(PTR1)
195 movdqa %xmm2, (2*16)(PTR1)
196 movdqa %xmm3, (3*16)(PTR1)
197 movdqa %xmm4, (4*16)(PTR1)
198 movdqa %xmm5, (5*16)(PTR1)
199 movdqa %xmm6, (6*16)(PTR1)
200 movdqa %xmm7, (7*16)(PTR1)
201
795666e6 202#ifdef __i386__
3e89e082 203 pop %esi
2a014536
BH
204 pop %ebp
205#endif
206 ret
207ENDPROC(x86_aes_mangle)
2a014536 208
94f03c9a
PA
209/* aeskeygenassist $imm,%xmm0,%xmm1 */
210#define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm
211
212ENTRY(x86_aes_expand_key)
795666e6 213#ifdef __i386__
94f03c9a
PA
214 push %ebp
215 mov %esp, %ebp
216 movl 8(%ebp), %eax
217#endif
218
219 SETPTR(aes_round_keys, PTR1)
220 movdqu (PTR0), %xmm0
221 movdqa %xmm0, (PTR1) /* First slot = the plain key */
222 add $16, PTR1
223
224 AESKEYGENASSIST(0x01)
225 call 1f
226 AESKEYGENASSIST(0x02)
227 call 1f
228 AESKEYGENASSIST(0x04)
229 call 1f
230 AESKEYGENASSIST(0x08)
231 call 1f
232 AESKEYGENASSIST(0x10)
233 call 1f
234 AESKEYGENASSIST(0x20)
235 call 1f
236 AESKEYGENASSIST(0x40)
237 call 1f
238 AESKEYGENASSIST(0x80)
239 call 1f
240 AESKEYGENASSIST(0x1b)
241 call 1f
242 AESKEYGENASSIST(0x36)
243 call 1f
244
795666e6 245#ifdef __i386__
94f03c9a
PA
246 pop %ebp
247#endif
248 ret
2a014536 249
94f03c9a
PA
2501:
251 pshufd $0xff, %xmm1, %xmm1
252 movdqa %xmm0, %xmm2
253 pslldq $4, %xmm2
254 pxor %xmm2, %xmm0
255 pslldq $4, %xmm2
256 pxor %xmm2, %xmm0
257 pslldq $4, %xmm2
258 pxor %xmm2, %xmm0
259 pxor %xmm1, %xmm0
260 movdqa %xmm0, (PTR1)
261 add $16, PTR1
262 ret
263
264ENDPROC(x86_aes_expand_key)
265
266 .bss
267 .balign 64
268aes_round_keys:
269 .space 11*16
270 .size aes_round_keys, .-aes_round_keys
795666e6 271
2a014536 272#endif /* i386 or x86_64 */
b8579105
PA
273
274/*
275 * This is necessary to keep the whole executable
276 * from needing a writable stack.
277 */
278 .section .note.GNU-stack,"",%progbits