]>
Commit | Line | Data |
---|---|---|
2a014536 | 1 | /* |
94f03c9a | 2 | * Copyright (c) 2011-2014, Intel Corporation |
2a014536 BH |
3 | * Authors: Fenghua Yu <fenghua.yu@intel.com>, |
4 | * H. Peter Anvin <hpa@linux.intel.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms and conditions of the GNU General Public License, | |
8 | * version 2, as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope it will be useful, but WITHOUT | |
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
13 | * more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along with | |
16 | * this program; if not, write to the Free Software Foundation, Inc., | |
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | |
18 | * | |
19 | */ | |
20 | ||
795666e6 PA |
21 | #if defined(__i386__) || defined(__x86_64__) |
22 | ||
2a014536 BH |
23 | #define ENTRY(x) \ |
24 | .balign 64 ; \ | |
25 | .globl x ; \ | |
26 | x: | |
27 | ||
28 | #define ENDPROC(x) \ | |
29 | .size x, .-x ; \ | |
30 | .type x, @function | |
31 | ||
32 | #define RDRAND_RETRY_LIMIT 10 | |
33 | ||
795666e6 | 34 | #ifdef __x86_64__ |
2a014536 | 35 | |
3164d3eb PA |
36 | ENTRY(x86_rdrand_bytes) |
37 | mov %esi, %eax | |
2a014536 | 38 | 1: |
3164d3eb | 39 | mov $RDRAND_RETRY_LIMIT, %ecx |
2a014536 BH |
40 | 2: |
41 | .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */ | |
42 | jnc 3f | |
43 | mov %rdx, (%rdi) | |
44 | add $8, %rdi | |
3164d3eb PA |
45 | sub $8, %esi |
46 | ja 1b | |
47 | 4: | |
48 | sub %esi, %eax | |
2a014536 BH |
49 | ret |
50 | 3: | |
3164d3eb | 51 | dec %ecx |
2a014536 BH |
52 | rep;nop |
53 | jnz 2b | |
3164d3eb PA |
54 | jmp 4b |
55 | ENDPROC(x86_rdrand_bytes) | |
2a014536 BH |
56 | |
57 | #define SETPTR(var,ptr) leaq var(%rip),ptr | |
58 | #define PTR0 %rdi | |
59 | #define PTR1 %rsi | |
60 | #define PTR2 %rcx | |
3e89e082 | 61 | #define CTR3 %eax |
2a014536 BH |
62 | #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ |
63 | ||
64 | #elif defined(__i386__) | |
65 | ||
3164d3eb | 66 | ENTRY(x86_rdrand_bytes) |
2a014536 BH |
67 | push %ebp |
68 | mov %esp, %ebp | |
69 | push %edi | |
3164d3eb PA |
70 | push %esi |
71 | movl 8(%ebp), %edi | |
72 | movl 12(%ebp), %esi | |
73 | ||
74 | mov %esi, %eax | |
2a014536 | 75 | 1: |
3164d3eb | 76 | mov $RDRAND_RETRY_LIMIT, %ecx |
2a014536 | 77 | 2: |
3164d3eb | 78 | .byte 0x0f,0xc7,0xf2 /* rdrand %edx */ |
2a014536 | 79 | jnc 3f |
3164d3eb PA |
80 | mov %edx, (%edi) |
81 | add $4, %edi | |
82 | sub $4, %esi | |
83 | ja 1b | |
84 | 4: | |
85 | sub %esi, %eax | |
86 | pop %esi | |
2a014536 BH |
87 | pop %edi |
88 | pop %ebp | |
89 | ret | |
90 | 3: | |
3164d3eb | 91 | dec %ecx |
2a014536 BH |
92 | rep;nop |
93 | jnz 2b | |
3164d3eb PA |
94 | jmp 4b |
95 | ENDPROC(x86_rdrand_bytes) | |
2a014536 BH |
96 | |
97 | #define SETPTR(var,ptr) movl $(var),ptr | |
98 | #define PTR0 %eax | |
99 | #define PTR1 %edx | |
100 | #define PTR2 %ecx | |
3e89e082 | 101 | #define CTR3 %esi |
2a014536 BH |
102 | #define NPTR2 1 /* %rcx = %r1 */ |
103 | ||
104 | #endif | |
105 | ||
2a014536 | 106 | ENTRY(x86_aes_mangle) |
795666e6 | 107 | #ifdef __i386__ |
2a014536 BH |
108 | push %ebp |
109 | mov %esp, %ebp | |
110 | movl 8(%ebp), %eax | |
111 | movl 12(%ebp), %edx | |
3e89e082 | 112 | push %esi |
2a014536 | 113 | #endif |
3e89e082 PA |
114 | movl $512, CTR3 /* Number of rounds */ |
115 | ||
116 | movdqa (0*16)(PTR1), %xmm0 | |
117 | movdqa (1*16)(PTR1), %xmm1 | |
118 | movdqa (2*16)(PTR1), %xmm2 | |
119 | movdqa (3*16)(PTR1), %xmm3 | |
120 | movdqa (4*16)(PTR1), %xmm4 | |
121 | movdqa (5*16)(PTR1), %xmm5 | |
122 | movdqa (6*16)(PTR1), %xmm6 | |
123 | movdqa (7*16)(PTR1), %xmm7 | |
2a014536 | 124 | |
3e89e082 PA |
125 | #ifdef __x86_64__ |
126 | SETPTR(aes_round_keys, PTR2) | |
127 | 1: | |
128 | #else | |
129 | 1: | |
2a014536 | 130 | SETPTR(aes_round_keys, PTR2) |
3e89e082 | 131 | #endif |
2a014536 | 132 | |
3e89e082 PA |
133 | /* 8192 = 512 (rounds) * 16 (bytes) */ |
134 | pxor (0*8192)(PTR0), %xmm0 | |
135 | pxor (1*8192)(PTR0), %xmm1 | |
136 | pxor (2*8192)(PTR0), %xmm2 | |
137 | pxor (3*8192)(PTR0), %xmm3 | |
138 | pxor (4*8192)(PTR0), %xmm4 | |
139 | pxor (5*8192)(PTR0), %xmm5 | |
140 | pxor (6*8192)(PTR0), %xmm6 | |
141 | pxor (7*8192)(PTR0), %xmm7 | |
142 | add $16, PTR0 | |
2a014536 | 143 | |
c851f481 | 144 | offset = 0 |
2a014536 | 145 | .rept 10 |
c851f481 PA |
146 | #ifdef __x86_64__ |
147 | movdqa offset(PTR2), %xmm8 | |
148 | offset = offset + 16 | |
149 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */ | |
150 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ | |
151 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */ | |
152 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */ | |
153 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */ | |
154 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */ | |
155 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */ | |
156 | .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */ | |
157 | #else | |
2a014536 BH |
158 | .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */ |
159 | .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */ | |
160 | .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */ | |
161 | .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */ | |
162 | .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */ | |
163 | .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */ | |
164 | .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */ | |
165 | .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */ | |
166 | add $16, PTR2 | |
c851f481 | 167 | #endif |
2a014536 BH |
168 | .endr |
169 | ||
c851f481 PA |
170 | #ifdef __x86_64__ |
171 | movdqa offset(PTR2), %xmm8 | |
172 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */ | |
173 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */ | |
174 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */ | |
175 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */ | |
176 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */ | |
177 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */ | |
178 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */ | |
179 | .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */ | |
180 | #else | |
2a014536 BH |
181 | .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */ |
182 | .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */ | |
183 | .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */ | |
184 | .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */ | |
185 | .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */ | |
186 | .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */ | |
187 | .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */ | |
188 | .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */ | |
c851f481 | 189 | #endif |
3e89e082 PA |
190 | sub $1, CTR3 |
191 | jnz 1b | |
192 | ||
2a014536 BH |
193 | movdqa %xmm0, (0*16)(PTR1) |
194 | movdqa %xmm1, (1*16)(PTR1) | |
195 | movdqa %xmm2, (2*16)(PTR1) | |
196 | movdqa %xmm3, (3*16)(PTR1) | |
197 | movdqa %xmm4, (4*16)(PTR1) | |
198 | movdqa %xmm5, (5*16)(PTR1) | |
199 | movdqa %xmm6, (6*16)(PTR1) | |
200 | movdqa %xmm7, (7*16)(PTR1) | |
201 | ||
795666e6 | 202 | #ifdef __i386__ |
3e89e082 | 203 | pop %esi |
2a014536 BH |
204 | pop %ebp |
205 | #endif | |
206 | ret | |
207 | ENDPROC(x86_aes_mangle) | |
2a014536 | 208 | |
94f03c9a PA |
209 | /* aeskeygenassist $imm,%xmm0,%xmm1 */ |
210 | #define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm | |
211 | ||
212 | ENTRY(x86_aes_expand_key) | |
795666e6 | 213 | #ifdef __i386__ |
94f03c9a PA |
214 | push %ebp |
215 | mov %esp, %ebp | |
216 | movl 8(%ebp), %eax | |
217 | #endif | |
218 | ||
219 | SETPTR(aes_round_keys, PTR1) | |
220 | movdqu (PTR0), %xmm0 | |
221 | movdqa %xmm0, (PTR1) /* First slot = the plain key */ | |
222 | add $16, PTR1 | |
223 | ||
224 | AESKEYGENASSIST(0x01) | |
225 | call 1f | |
226 | AESKEYGENASSIST(0x02) | |
227 | call 1f | |
228 | AESKEYGENASSIST(0x04) | |
229 | call 1f | |
230 | AESKEYGENASSIST(0x08) | |
231 | call 1f | |
232 | AESKEYGENASSIST(0x10) | |
233 | call 1f | |
234 | AESKEYGENASSIST(0x20) | |
235 | call 1f | |
236 | AESKEYGENASSIST(0x40) | |
237 | call 1f | |
238 | AESKEYGENASSIST(0x80) | |
239 | call 1f | |
240 | AESKEYGENASSIST(0x1b) | |
241 | call 1f | |
242 | AESKEYGENASSIST(0x36) | |
243 | call 1f | |
244 | ||
795666e6 | 245 | #ifdef __i386__ |
94f03c9a PA |
246 | pop %ebp |
247 | #endif | |
248 | ret | |
2a014536 | 249 | |
94f03c9a PA |
250 | 1: |
251 | pshufd $0xff, %xmm1, %xmm1 | |
252 | movdqa %xmm0, %xmm2 | |
253 | pslldq $4, %xmm2 | |
254 | pxor %xmm2, %xmm0 | |
255 | pslldq $4, %xmm2 | |
256 | pxor %xmm2, %xmm0 | |
257 | pslldq $4, %xmm2 | |
258 | pxor %xmm2, %xmm0 | |
259 | pxor %xmm1, %xmm0 | |
260 | movdqa %xmm0, (PTR1) | |
261 | add $16, PTR1 | |
262 | ret | |
263 | ||
264 | ENDPROC(x86_aes_expand_key) | |
265 | ||
266 | .bss | |
267 | .balign 64 | |
268 | aes_round_keys: | |
269 | .space 11*16 | |
270 | .size aes_round_keys, .-aes_round_keys | |
795666e6 | 271 | |
2a014536 | 272 | #endif /* i386 or x86_64 */ |
b8579105 PA |
273 | |
274 | /* | |
275 | * This is necessary to keep the whole executable | |
276 | * from needing a writable stack. | |
277 | */ | |
278 | .section .note.GNU-stack,"",%progbits |