]>
Commit | Line | Data |
---|---|---|
2a014536 | 1 | /* |
94f03c9a | 2 | * Copyright (c) 2011-2014, Intel Corporation |
2a014536 BH |
3 | * Authors: Fenghua Yu <fenghua.yu@intel.com>, |
4 | * H. Peter Anvin <hpa@linux.intel.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms and conditions of the GNU General Public License, | |
8 | * version 2, as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope it will be useful, but WITHOUT | |
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
13 | * more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along with | |
16 | * this program; if not, write to the Free Software Foundation, Inc., | |
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | |
18 | * | |
19 | */ | |
20 | ||
795666e6 PA |
21 | #if defined(__i386__) || defined(__x86_64__) |
22 | ||
2a014536 BH |
23 | #define ENTRY(x) \ |
24 | .balign 64 ; \ | |
25 | .globl x ; \ | |
26 | x: | |
27 | ||
28 | #define ENDPROC(x) \ | |
29 | .size x, .-x ; \ | |
30 | .type x, @function | |
31 | ||
32 | #define RDRAND_RETRY_LIMIT 10 | |
33 | ||
795666e6 | 34 | #ifdef __x86_64__ |
2a014536 | 35 | |
3164d3eb PA |
36 | ENTRY(x86_rdrand_bytes) |
37 | mov %esi, %eax | |
2a014536 | 38 | 1: |
3164d3eb | 39 | mov $RDRAND_RETRY_LIMIT, %ecx |
2a014536 BH |
40 | 2: |
41 | .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */ | |
42 | jnc 3f | |
43 | mov %rdx, (%rdi) | |
44 | add $8, %rdi | |
3164d3eb PA |
45 | sub $8, %esi |
46 | ja 1b | |
47 | 4: | |
48 | sub %esi, %eax | |
2a014536 BH |
49 | ret |
50 | 3: | |
3164d3eb | 51 | dec %ecx |
2a014536 BH |
52 | rep;nop |
53 | jnz 2b | |
3164d3eb PA |
54 | jmp 4b |
55 | ENDPROC(x86_rdrand_bytes) | |
2a014536 | 56 | |
f655a53a PA |
57 | ENTRY(x86_rdseed_or_rdrand_bytes) |
58 | mov (%rsi), %r8d /* RDSEED count */ | |
59 | mov (%rcx), %r9d /* RDRAND count */ | |
60 | 1: | |
61 | mov $RDRAND_RETRY_LIMIT, %r10d | |
62 | 2: | |
63 | .byte 0x48,0x0f,0xc7,0xf8 /* rdseed %rax */ | |
64 | jnc 3f | |
65 | mov %rax, (%rdi) | |
66 | add $8, %rdi | |
67 | sub $8, %r8d | |
68 | ja 1b | |
69 | 4: | |
70 | sub %r8d, (%rsi) | |
71 | sub %r9d, (%rcx) | |
72 | ret | |
73 | 3: | |
74 | .byte 0x48,0x0f,0xc7,0xf0 /* rdrand %rax */ | |
75 | jnc 5f | |
76 | mov %rax, (%rdx) | |
77 | add $8, %rdx | |
78 | sub $8, %r9d | |
79 | ja 1b | |
80 | jmp 4b | |
81 | 5: | |
82 | dec %r10d | |
83 | rep;nop | |
84 | jnz 2b | |
85 | jmp 4b | |
86 | ENDPROC(x86_rdseed_or_rdrand_bytes) | |
87 | ||
2a014536 BH |
88 | #define SETPTR(var,ptr) leaq var(%rip),ptr |
89 | #define PTR0 %rdi | |
90 | #define PTR1 %rsi | |
91 | #define PTR2 %rcx | |
3e89e082 | 92 | #define CTR3 %eax |
2a014536 BH |
93 | #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ |
94 | ||
95 | #elif defined(__i386__) | |
96 | ||
3164d3eb | 97 | ENTRY(x86_rdrand_bytes) |
2a014536 BH |
98 | push %ebp |
99 | mov %esp, %ebp | |
100 | push %edi | |
3164d3eb PA |
101 | push %esi |
102 | movl 8(%ebp), %edi | |
103 | movl 12(%ebp), %esi | |
104 | ||
105 | mov %esi, %eax | |
2a014536 | 106 | 1: |
3164d3eb | 107 | mov $RDRAND_RETRY_LIMIT, %ecx |
2a014536 | 108 | 2: |
3164d3eb | 109 | .byte 0x0f,0xc7,0xf2 /* rdrand %edx */ |
2a014536 | 110 | jnc 3f |
3164d3eb PA |
111 | mov %edx, (%edi) |
112 | add $4, %edi | |
113 | sub $4, %esi | |
114 | ja 1b | |
115 | 4: | |
116 | sub %esi, %eax | |
117 | pop %esi | |
2a014536 BH |
118 | pop %edi |
119 | pop %ebp | |
120 | ret | |
121 | 3: | |
3164d3eb | 122 | dec %ecx |
2a014536 BH |
123 | rep;nop |
124 | jnz 2b | |
3164d3eb PA |
125 | jmp 4b |
126 | ENDPROC(x86_rdrand_bytes) | |
2a014536 | 127 | |
f655a53a PA |
128 | |
129 | ENTRY(x86_rdseed_or_rdrand_bytes) | |
130 | push %ebp | |
131 | mov %esp, %ebp | |
132 | push %edi | |
133 | push %esi | |
134 | push %ebx | |
135 | ||
136 | mov 12(%ebp), %ebx | |
137 | mov 20(%ebp), %esi | |
138 | mov 8(%ebp), %edi /* RDSEED pointer */ | |
139 | mov 16(%ebp), %edx /* RDRAND pointer */ | |
140 | mov (%ebx), %ebx /* RDSEED count */ | |
141 | mov (%esi), %esi /* RDRAND count */ | |
142 | 1: | |
143 | mov $RDRAND_RETRY_LIMIT, %ecx | |
144 | 2: | |
145 | .byte 0x0f,0xc7,0xf8 /* rdseed %eax */ | |
146 | jnc 3f | |
147 | mov %eax, (%edi) | |
148 | add $4, %edi | |
149 | sub $4, %ebx | |
150 | ja 1b | |
151 | 4: | |
152 | mov 12(%ebp), %edx | |
153 | mov 20(%ebp), %eax | |
154 | sub %ebx, (%edx) /* RDSEED count */ | |
155 | sub %esi, (%eax) /* RDRAND count */ | |
156 | ||
157 | pop %ebx | |
158 | pop %esi | |
159 | pop %edi | |
160 | pop %ebp | |
161 | ret | |
162 | 3: | |
163 | .byte 0x0f,0xc7,0xf0 /* rdrand %eax */ | |
164 | jnc 5f | |
165 | mov %eax, (%edx) | |
166 | add $4, %edx | |
167 | sub $4, %esi | |
168 | jnz 1b | |
169 | ja 4b | |
170 | 5: | |
171 | dec %ecx | |
172 | rep;nop | |
173 | jnz 2b | |
174 | jmp 4b | |
175 | ENDPROC(x86_rdseed_or_rdrand_bytes) | |
176 | ||
2a014536 BH |
177 | #define SETPTR(var,ptr) movl $(var),ptr |
178 | #define PTR0 %eax | |
179 | #define PTR1 %edx | |
180 | #define PTR2 %ecx | |
3e89e082 | 181 | #define CTR3 %esi |
2a014536 BH |
182 | #define NPTR2 1 /* %rcx = %r1 */ |
183 | ||
184 | #endif | |
185 | ||
2a014536 | 186 | ENTRY(x86_aes_mangle) |
795666e6 | 187 | #ifdef __i386__ |
2a014536 BH |
188 | push %ebp |
189 | mov %esp, %ebp | |
190 | movl 8(%ebp), %eax | |
191 | movl 12(%ebp), %edx | |
3e89e082 | 192 | push %esi |
2a014536 | 193 | #endif |
3e89e082 PA |
194 | movl $512, CTR3 /* Number of rounds */ |
195 | ||
196 | movdqa (0*16)(PTR1), %xmm0 | |
197 | movdqa (1*16)(PTR1), %xmm1 | |
198 | movdqa (2*16)(PTR1), %xmm2 | |
199 | movdqa (3*16)(PTR1), %xmm3 | |
200 | movdqa (4*16)(PTR1), %xmm4 | |
201 | movdqa (5*16)(PTR1), %xmm5 | |
202 | movdqa (6*16)(PTR1), %xmm6 | |
203 | movdqa (7*16)(PTR1), %xmm7 | |
2a014536 | 204 | |
3e89e082 PA |
205 | #ifdef __x86_64__ |
206 | SETPTR(aes_round_keys, PTR2) | |
207 | 1: | |
208 | #else | |
209 | 1: | |
2a014536 | 210 | SETPTR(aes_round_keys, PTR2) |
3e89e082 | 211 | #endif |
2a014536 | 212 | |
3e89e082 PA |
213 | /* 8192 = 512 (rounds) * 16 (bytes) */ |
214 | pxor (0*8192)(PTR0), %xmm0 | |
215 | pxor (1*8192)(PTR0), %xmm1 | |
216 | pxor (2*8192)(PTR0), %xmm2 | |
217 | pxor (3*8192)(PTR0), %xmm3 | |
218 | pxor (4*8192)(PTR0), %xmm4 | |
219 | pxor (5*8192)(PTR0), %xmm5 | |
220 | pxor (6*8192)(PTR0), %xmm6 | |
221 | pxor (7*8192)(PTR0), %xmm7 | |
222 | add $16, PTR0 | |
2a014536 | 223 | |
c851f481 | 224 | offset = 0 |
2a014536 | 225 | .rept 10 |
c851f481 PA |
226 | #ifdef __x86_64__ |
227 | movdqa offset(PTR2), %xmm8 | |
228 | offset = offset + 16 | |
229 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */ | |
230 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ | |
231 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */ | |
232 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */ | |
233 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */ | |
234 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */ | |
235 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */ | |
236 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */ | |
237 | #else | |
2a014536 BH |
238 | .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */ |
239 | .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */ | |
240 | .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */ | |
241 | .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */ | |
242 | .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */ | |
243 | .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */ | |
244 | .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */ | |
245 | .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */ | |
246 | add $16, PTR2 | |
c851f481 | 247 | #endif |
2a014536 BH |
248 | .endr |
249 | ||
c851f481 PA |
250 | #ifdef __x86_64__ |
251 | movdqa offset(PTR2), %xmm8 | |
252 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */ | |
253 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */ | |
254 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */ | |
255 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */ | |
256 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */ | |
257 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */ | |
258 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */ | |
259 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */ | |
260 | #else | |
2a014536 BH |
261 | .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */ |
262 | .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */ | |
263 | .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */ | |
264 | .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */ | |
265 | .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */ | |
266 | .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */ | |
267 | .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */ | |
268 | .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */ | |
c851f481 | 269 | #endif |
3e89e082 PA |
270 | sub $1, CTR3 |
271 | jnz 1b | |
272 | ||
2a014536 BH |
273 | movdqa %xmm0, (0*16)(PTR1) |
274 | movdqa %xmm1, (1*16)(PTR1) | |
275 | movdqa %xmm2, (2*16)(PTR1) | |
276 | movdqa %xmm3, (3*16)(PTR1) | |
277 | movdqa %xmm4, (4*16)(PTR1) | |
278 | movdqa %xmm5, (5*16)(PTR1) | |
279 | movdqa %xmm6, (6*16)(PTR1) | |
280 | movdqa %xmm7, (7*16)(PTR1) | |
281 | ||
795666e6 | 282 | #ifdef __i386__ |
3e89e082 | 283 | pop %esi |
2a014536 BH |
284 | pop %ebp |
285 | #endif | |
286 | ret | |
287 | ENDPROC(x86_aes_mangle) | |
2a014536 | 288 | |
94f03c9a PA |
289 | /* aeskeygenassist $imm,%xmm0,%xmm1 */ |
290 | #define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm | |
291 | ||
292 | ENTRY(x86_aes_expand_key) | |
795666e6 | 293 | #ifdef __i386__ |
94f03c9a PA |
294 | push %ebp |
295 | mov %esp, %ebp | |
296 | movl 8(%ebp), %eax | |
297 | #endif | |
298 | ||
299 | SETPTR(aes_round_keys, PTR1) | |
300 | movdqu (PTR0), %xmm0 | |
301 | movdqa %xmm0, (PTR1) /* First slot = the plain key */ | |
302 | add $16, PTR1 | |
303 | ||
304 | AESKEYGENASSIST(0x01) | |
305 | call 1f | |
306 | AESKEYGENASSIST(0x02) | |
307 | call 1f | |
308 | AESKEYGENASSIST(0x04) | |
309 | call 1f | |
310 | AESKEYGENASSIST(0x08) | |
311 | call 1f | |
312 | AESKEYGENASSIST(0x10) | |
313 | call 1f | |
314 | AESKEYGENASSIST(0x20) | |
315 | call 1f | |
316 | AESKEYGENASSIST(0x40) | |
317 | call 1f | |
318 | AESKEYGENASSIST(0x80) | |
319 | call 1f | |
320 | AESKEYGENASSIST(0x1b) | |
321 | call 1f | |
322 | AESKEYGENASSIST(0x36) | |
323 | call 1f | |
324 | ||
795666e6 | 325 | #ifdef __i386__ |
94f03c9a PA |
326 | pop %ebp |
327 | #endif | |
328 | ret | |
2a014536 | 329 | |
94f03c9a PA |
330 | 1: |
331 | pshufd $0xff, %xmm1, %xmm1 | |
332 | movdqa %xmm0, %xmm2 | |
333 | pslldq $4, %xmm2 | |
334 | pxor %xmm2, %xmm0 | |
335 | pslldq $4, %xmm2 | |
336 | pxor %xmm2, %xmm0 | |
337 | pslldq $4, %xmm2 | |
338 | pxor %xmm2, %xmm0 | |
339 | pxor %xmm1, %xmm0 | |
340 | movdqa %xmm0, (PTR1) | |
341 | add $16, PTR1 | |
342 | ret | |
343 | ||
344 | ENDPROC(x86_aes_expand_key) | |
345 | ||
346 | .bss | |
347 | .balign 64 | |
348 | aes_round_keys: | |
349 | .space 11*16 | |
350 | .size aes_round_keys, .-aes_round_keys | |
795666e6 | 351 | |
2a014536 | 352 | #endif /* i386 or x86_64 */ |
b8579105 PA |
353 | |
354 | /* | |
355 | * This is necessary to keep the whole executable | |
356 | * from needing a writable stack. | |
357 | */ | |
358 | .section .note.GNU-stack,"",%progbits |