]> git.ipfire.org Git - thirdparty/rng-tools.git/blame - rdrand_asm.S
Release version 5.
[thirdparty/rng-tools.git] / rdrand_asm.S
CommitLineData
2a014536 1/*
94f03c9a 2 * Copyright (c) 2011-2014, Intel Corporation
2a014536
BH
3 * Authors: Fenghua Yu <fenghua.yu@intel.com>,
4 * H. Peter Anvin <hpa@linux.intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20
795666e6
PA
21#if defined(__i386__) || defined(__x86_64__)
22
2a014536
BH
23#define ENTRY(x) \
24 .balign 64 ; \
25 .globl x ; \
26x:
27
28#define ENDPROC(x) \
29 .size x, .-x ; \
30 .type x, @function
31
32#define RDRAND_RETRY_LIMIT 10
33
795666e6 34#ifdef __x86_64__
2a014536 35
3164d3eb
PA
36ENTRY(x86_rdrand_bytes)
37 mov %esi, %eax
2a014536 381:
3164d3eb 39 mov $RDRAND_RETRY_LIMIT, %ecx
2a014536
BH
402:
41 .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */
42 jnc 3f
43 mov %rdx, (%rdi)
44 add $8, %rdi
3164d3eb
PA
45 sub $8, %esi
46 ja 1b
474:
48 sub %esi, %eax
2a014536
BH
49 ret
503:
3164d3eb 51 dec %ecx
2a014536
BH
52 rep;nop
53 jnz 2b
3164d3eb
PA
54 jmp 4b
55ENDPROC(x86_rdrand_bytes)
2a014536 56
f655a53a
PA
57ENTRY(x86_rdseed_or_rdrand_bytes)
58 mov (%rsi), %r8d /* RDSEED count */
59 mov (%rcx), %r9d /* RDRAND count */
601:
61 mov $RDRAND_RETRY_LIMIT, %r10d
622:
63 .byte 0x48,0x0f,0xc7,0xf8 /* rdseed %rax */
64 jnc 3f
65 mov %rax, (%rdi)
66 add $8, %rdi
67 sub $8, %r8d
68 ja 1b
694:
70 sub %r8d, (%rsi)
71 sub %r9d, (%rcx)
72 ret
733:
74 .byte 0x48,0x0f,0xc7,0xf0 /* rdrand %rax */
75 jnc 5f
76 mov %rax, (%rdx)
77 add $8, %rdx
78 sub $8, %r9d
79 ja 1b
80 jmp 4b
815:
82 dec %r10d
83 rep;nop
84 jnz 2b
85 jmp 4b
86ENDPROC(x86_rdseed_or_rdrand_bytes)
87
2a014536
BH
88#define SETPTR(var,ptr) leaq var(%rip),ptr
89#define PTR0 %rdi
90#define PTR1 %rsi
91#define PTR2 %rcx
3e89e082 92#define CTR3 %eax
2a014536
BH
93#define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */
94
95#elif defined(__i386__)
96
3164d3eb 97ENTRY(x86_rdrand_bytes)
2a014536
BH
98 push %ebp
99 mov %esp, %ebp
100 push %edi
3164d3eb
PA
101 push %esi
102 movl 8(%ebp), %edi
103 movl 12(%ebp), %esi
104
105 mov %esi, %eax
2a014536 1061:
3164d3eb 107 mov $RDRAND_RETRY_LIMIT, %ecx
2a014536 1082:
3164d3eb 109 .byte 0x0f,0xc7,0xf2 /* rdrand %edx */
2a014536 110 jnc 3f
3164d3eb
PA
111 mov %edx, (%edi)
112 add $4, %edi
113 sub $4, %esi
114 ja 1b
1154:
116 sub %esi, %eax
117 pop %esi
2a014536
BH
118 pop %edi
119 pop %ebp
120 ret
1213:
3164d3eb 122 dec %ecx
2a014536
BH
123 rep;nop
124 jnz 2b
3164d3eb
PA
125 jmp 4b
126ENDPROC(x86_rdrand_bytes)
2a014536 127
f655a53a
PA
128
129ENTRY(x86_rdseed_or_rdrand_bytes)
130 push %ebp
131 mov %esp, %ebp
132 push %edi
133 push %esi
134 push %ebx
135
136 mov 12(%ebp), %ebx
137 mov 20(%ebp), %esi
138 mov 8(%ebp), %edi /* RDSEED pointer */
139 mov 16(%ebp), %edx /* RDRAND pointer */
140 mov (%ebx), %ebx /* RDSEED count */
141 mov (%esi), %esi /* RDRAND count */
1421:
143 mov $RDRAND_RETRY_LIMIT, %ecx
1442:
145 .byte 0x0f,0xc7,0xf8 /* rdseed %eax */
146 jnc 3f
147 mov %eax, (%edi)
148 add $4, %edi
149 sub $4, %ebx
150 ja 1b
1514:
152 mov 12(%ebp), %edx
153 mov 20(%ebp), %eax
154 sub %ebx, (%edx) /* RDSEED count */
155 sub %esi, (%eax) /* RDRAND count */
156
157 pop %ebx
158 pop %esi
159 pop %edi
160 pop %ebp
161 ret
1623:
163 .byte 0x0f,0xc7,0xf0 /* rdrand %eax */
164 jnc 5f
165 mov %eax, (%edx)
166 add $4, %edx
167 sub $4, %esi
168 jnz 1b
169 ja 4b
1705:
171 dec %ecx
172 rep;nop
173 jnz 2b
174 jmp 4b
175ENDPROC(x86_rdseed_or_rdrand_bytes)
176
2a014536
BH
177#define SETPTR(var,ptr) movl $(var),ptr
178#define PTR0 %eax
179#define PTR1 %edx
180#define PTR2 %ecx
3e89e082 181#define CTR3 %esi
2a014536
BH
182#define NPTR2 1 /* %rcx = %r1 */
183
184#endif
185
2a014536 186ENTRY(x86_aes_mangle)
795666e6 187#ifdef __i386__
2a014536
BH
188 push %ebp
189 mov %esp, %ebp
190 movl 8(%ebp), %eax
191 movl 12(%ebp), %edx
3e89e082 192 push %esi
2a014536 193#endif
3e89e082
PA
194 movl $512, CTR3 /* Number of rounds */
195
196 movdqa (0*16)(PTR1), %xmm0
197 movdqa (1*16)(PTR1), %xmm1
198 movdqa (2*16)(PTR1), %xmm2
199 movdqa (3*16)(PTR1), %xmm3
200 movdqa (4*16)(PTR1), %xmm4
201 movdqa (5*16)(PTR1), %xmm5
202 movdqa (6*16)(PTR1), %xmm6
203 movdqa (7*16)(PTR1), %xmm7
2a014536 204
3e89e082
PA
205#ifdef __x86_64__
206 SETPTR(aes_round_keys, PTR2)
2071:
208#else
2091:
2a014536 210 SETPTR(aes_round_keys, PTR2)
3e89e082 211#endif
2a014536 212
3e89e082
PA
213 /* 8192 = 512 (rounds) * 16 (bytes) */
214 pxor (0*8192)(PTR0), %xmm0
215 pxor (1*8192)(PTR0), %xmm1
216 pxor (2*8192)(PTR0), %xmm2
217 pxor (3*8192)(PTR0), %xmm3
218 pxor (4*8192)(PTR0), %xmm4
219 pxor (5*8192)(PTR0), %xmm5
220 pxor (6*8192)(PTR0), %xmm6
221 pxor (7*8192)(PTR0), %xmm7
222 add $16, PTR0
2a014536 223
c851f481 224offset = 0
2a014536 225 .rept 10
c851f481
PA
226#ifdef __x86_64__
227 movdqa offset(PTR2), %xmm8
228offset = offset + 16
229 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */
230 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
231 .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */
232 .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */
233 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */
234 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */
235 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */
236 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */
237#else
2a014536
BH
238 .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */
239 .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */
240 .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */
241 .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */
242 .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */
243 .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */
244 .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */
245 .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */
246 add $16, PTR2
c851f481 247#endif
2a014536
BH
248 .endr
249
c851f481
PA
250#ifdef __x86_64__
251 movdqa offset(PTR2), %xmm8
252 .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */
253 .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */
254 .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */
255 .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */
256 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */
257 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */
258 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */
259 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */
260#else
2a014536
BH
261 .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */
262 .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */
263 .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */
264 .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */
265 .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */
266 .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */
267 .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */
268 .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */
c851f481 269#endif
3e89e082
PA
270 sub $1, CTR3
271 jnz 1b
272
2a014536
BH
273 movdqa %xmm0, (0*16)(PTR1)
274 movdqa %xmm1, (1*16)(PTR1)
275 movdqa %xmm2, (2*16)(PTR1)
276 movdqa %xmm3, (3*16)(PTR1)
277 movdqa %xmm4, (4*16)(PTR1)
278 movdqa %xmm5, (5*16)(PTR1)
279 movdqa %xmm6, (6*16)(PTR1)
280 movdqa %xmm7, (7*16)(PTR1)
281
795666e6 282#ifdef __i386__
3e89e082 283 pop %esi
2a014536
BH
284 pop %ebp
285#endif
286 ret
287ENDPROC(x86_aes_mangle)
2a014536 288
94f03c9a
PA
289/* aeskeygenassist $imm,%xmm0,%xmm1 */
290#define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm
291
292ENTRY(x86_aes_expand_key)
795666e6 293#ifdef __i386__
94f03c9a
PA
294 push %ebp
295 mov %esp, %ebp
296 movl 8(%ebp), %eax
297#endif
298
299 SETPTR(aes_round_keys, PTR1)
300 movdqu (PTR0), %xmm0
301 movdqa %xmm0, (PTR1) /* First slot = the plain key */
302 add $16, PTR1
303
304 AESKEYGENASSIST(0x01)
305 call 1f
306 AESKEYGENASSIST(0x02)
307 call 1f
308 AESKEYGENASSIST(0x04)
309 call 1f
310 AESKEYGENASSIST(0x08)
311 call 1f
312 AESKEYGENASSIST(0x10)
313 call 1f
314 AESKEYGENASSIST(0x20)
315 call 1f
316 AESKEYGENASSIST(0x40)
317 call 1f
318 AESKEYGENASSIST(0x80)
319 call 1f
320 AESKEYGENASSIST(0x1b)
321 call 1f
322 AESKEYGENASSIST(0x36)
323 call 1f
324
795666e6 325#ifdef __i386__
94f03c9a
PA
326 pop %ebp
327#endif
328 ret
2a014536 329
94f03c9a
PA
3301:
331 pshufd $0xff, %xmm1, %xmm1
332 movdqa %xmm0, %xmm2
333 pslldq $4, %xmm2
334 pxor %xmm2, %xmm0
335 pslldq $4, %xmm2
336 pxor %xmm2, %xmm0
337 pslldq $4, %xmm2
338 pxor %xmm2, %xmm0
339 pxor %xmm1, %xmm0
340 movdqa %xmm0, (PTR1)
341 add $16, PTR1
342 ret
343
344ENDPROC(x86_aes_expand_key)
345
346 .bss
347 .balign 64
348aes_round_keys:
349 .space 11*16
350 .size aes_round_keys, .-aes_round_keys
795666e6 351
2a014536 352#endif /* i386 or x86_64 */
b8579105
PA
353
354/*
355 * This is necessary to keep the whole executable
356 * from needing a writable stack.
357 */
358 .section .note.GNU-stack,"",%progbits