]>
Commit | Line | Data |
---|---|---|
ed28aef8 AP |
1 | #!/usr/bin/env perl |
2 | ||
3 | # ==================================================================== | |
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
5 | # project. The module is, however, dual licensed under OpenSSL and | |
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 | # details see http://www.openssl.org/~appro/cryptogams/. | |
8 | # ==================================================================== | |
9 | ||
10 | # September 2011 | |
11 | # | |
12 | # Assembler helpers for Padlock engine. | |
13 | ||
14 | $flavour = shift; | |
15 | $output = shift; | |
16 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
17 | ||
18 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
19 | ||
20 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
21 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
22 | ( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or | |
23 | die "can't locate x86_64-xlate.pl"; | |
24 | ||
25 | open STDOUT,"| $^X $xlate $flavour $output"; | |
26 | ||
27 | $code=".text\n"; | |
28 | ||
29 | $PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16 | |
30 | ||
31 | $ctx="%rdx"; | |
32 | $out="%rdi"; | |
33 | $inp="%rsi"; | |
34 | $len="%rcx"; | |
35 | $chunk="%rbx"; | |
36 | ||
37 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order | |
38 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order | |
39 | ||
40 | $code.=<<___; | |
41 | .globl padlock_capability | |
42 | .type padlock_capability,\@abi-omnipotent | |
43 | .align 16 | |
44 | padlock_capability: | |
45 | mov %rbx,%r8 | |
46 | xor %eax,%eax | |
47 | cpuid | |
48 | xor %eax,%eax | |
49 | cmp \$`"0x".unpack("H*",'tneC')`,%ebx | |
50 | jne .Lnoluck | |
51 | cmp \$`"0x".unpack("H*",'Hrua')`,%edx | |
52 | jne .Lnoluck | |
53 | cmp \$`"0x".unpack("H*",'slua')`,%ecx | |
54 | jne .Lnoluck | |
55 | mov \$0xC0000000,%eax | |
56 | cpuid | |
57 | mov %eax,%edx | |
58 | xor %eax,%eax | |
59 | cmp \$0xC0000001,%edx | |
60 | jb .Lnoluck | |
61 | mov \$0xC0000001,%eax | |
62 | cpuid | |
63 | mov %edx,%eax | |
64 | and \$0xffffffef,%eax | |
65 | or \$0x10,%eax # set Nano bit#4 | |
66 | .Lnoluck: | |
67 | mov %r8,%rbx | |
68 | ret | |
69 | .size padlock_capability,.-padlock_capability | |
70 | ||
71 | .globl padlock_key_bswap | |
72 | .type padlock_key_bswap,\@abi-omnipotent,0 | |
73 | .align 16 | |
74 | padlock_key_bswap: | |
75 | mov 240($arg1),%edx | |
76 | .Lbswap_loop: | |
77 | mov ($arg1),%eax | |
78 | bswap %eax | |
79 | mov %eax,($arg1) | |
80 | lea 4($arg1),$arg1 | |
81 | sub \$1,%edx | |
82 | jnz .Lbswap_loop | |
83 | ret | |
84 | .size padlock_key_bswap,.-padlock_key_bswap | |
85 | ||
86 | .globl padlock_verify_context | |
87 | .type padlock_verify_context,\@abi-omnipotent | |
88 | .align 16 | |
89 | padlock_verify_context: | |
90 | mov $arg1,$ctx | |
91 | pushf | |
92 | lea .Lpadlock_saved_context(%rip),%rax | |
93 | call _padlock_verify_ctx | |
94 | lea 8(%rsp),%rsp | |
95 | ret | |
96 | .size padlock_verify_context,.-padlock_verify_context | |
97 | ||
98 | .type _padlock_verify_ctx,\@abi-omnipotent | |
99 | .align 16 | |
100 | _padlock_verify_ctx: | |
101 | mov 8(%rsp),%r8 | |
102 | bt \$30,%r8 | |
103 | jnc .Lverified | |
104 | cmp (%rax),$ctx | |
105 | je .Lverified | |
106 | pushf | |
107 | popf | |
108 | .Lverified: | |
109 | mov $ctx,(%rax) | |
110 | ret | |
111 | .size _padlock_verify_ctx,.-_padlock_verify_ctx | |
112 | ||
113 | .globl padlock_reload_key | |
114 | .type padlock_reload_key,\@abi-omnipotent | |
115 | .align 16 | |
116 | padlock_reload_key: | |
117 | pushf | |
118 | popf | |
119 | ret | |
120 | .size padlock_reload_key,.-padlock_reload_key | |
121 | ||
122 | .globl padlock_aes_block | |
123 | .type padlock_aes_block,\@function,3 | |
124 | .align 16 | |
125 | padlock_aes_block: | |
126 | mov %rbx,%r8 | |
127 | mov \$1,$len | |
128 | lea 32($ctx),%rbx # key | |
129 | lea 16($ctx),$ctx # control word | |
130 | .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb | |
131 | mov %r8,%rbx | |
132 | ret | |
133 | .size padlock_aes_block,.-padlock_aes_block | |
134 | ||
135 | .globl padlock_xstore | |
136 | .type padlock_xstore,\@function,2 | |
137 | .align 16 | |
138 | padlock_xstore: | |
139 | mov %esi,%edx | |
140 | .byte 0x0f,0xa7,0xc0 # xstore | |
141 | ret | |
142 | .size padlock_xstore,.-padlock_xstore | |
143 | ||
144 | .globl padlock_sha1_oneshot | |
145 | .type padlock_sha1_oneshot,\@function,3 | |
146 | .align 16 | |
147 | padlock_sha1_oneshot: | |
148 | xor %rax,%rax | |
149 | mov %rdx,%rcx | |
150 | .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 | |
151 | ret | |
152 | .size padlock_sha1_oneshot,.-padlock_sha1_oneshot | |
153 | ||
154 | .globl padlock_sha1 | |
155 | .type padlock_sha1,\@function,3 | |
156 | .align 16 | |
157 | padlock_sha1: | |
158 | mov \$-1,%rax | |
159 | mov %rdx,%rcx | |
160 | .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 | |
161 | ret | |
162 | .size padlock_sha1,.-padlock_sha1 | |
163 | ||
164 | .globl padlock_sha256_oneshot | |
165 | .type padlock_sha256_oneshot,\@function,3 | |
166 | .align 16 | |
167 | padlock_sha256_oneshot: | |
168 | xor %rax,%rax | |
169 | mov %rdx,%rcx | |
170 | .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 | |
171 | ret | |
172 | .size padlock_sha256_oneshot,.-padlock_sha256_oneshot | |
173 | ||
174 | .globl padlock_sha256 | |
175 | .type padlock_sha256,\@function,3 | |
176 | .align 16 | |
177 | padlock_sha256: | |
178 | mov \$-1,%rax | |
179 | mov %rdx,%rcx | |
180 | .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 | |
181 | ret | |
182 | .size padlock_sha256,.-padlock_sha256 | |
183 | ___ | |
184 | ||
185 | sub generate_mode { | |
186 | my ($mode,$opcode) = @_; | |
187 | # int padlock_$mode_encrypt(void *out, const void *inp, | |
188 | # struct padlock_cipher_data *ctx, size_t len); | |
189 | $code.=<<___; | |
190 | .globl padlock_${mode}_encrypt | |
191 | .type padlock_${mode}_encrypt,\@function,4 | |
192 | .align 16 | |
193 | padlock_${mode}_encrypt: | |
194 | push %rbp | |
195 | push %rbx | |
196 | ||
197 | xor %eax,%eax | |
198 | test \$15,$ctx | |
199 | jnz .L${mode}_abort | |
200 | test \$15,$len | |
201 | jnz .L${mode}_abort | |
202 | lea .Lpadlock_saved_context(%rip),%rax | |
203 | pushf | |
204 | cld | |
205 | call _padlock_verify_ctx | |
206 | lea 16($ctx),$ctx # control word | |
207 | xor %eax,%eax | |
208 | xor %ebx,%ebx | |
209 | test \$`1<<5`,($ctx) # align bit in control word | |
210 | test \$0x0f,$out | |
211 | setz %al # !out_misaligned | |
212 | test \$0x0f,$inp | |
213 | setz %bl # !inp_misaligned | |
214 | test %ebx,%eax | |
215 | jnz .L${mode}_aligned | |
216 | neg %rax | |
217 | mov \$$PADLOCK_CHUNK,$chunk | |
218 | not %rax # out_misaligned?-1:0 | |
219 | lea (%rsp),%rbp | |
220 | cmp $chunk,$len | |
221 | cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len | |
222 | and $chunk,%rax # out_misaligned?chunk:0 | |
223 | mov $len,$chunk | |
224 | neg %rax | |
225 | and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK | |
226 | lea (%rax,%rbp),%rsp | |
227 | jmp .L${mode}_loop | |
228 | .align 16 | |
229 | .L${mode}_loop: | |
230 | mov $out,%r8 # save parameters | |
231 | mov $inp,%r9 | |
232 | mov $len,%r10 | |
233 | mov $chunk,$len | |
234 | mov $chunk,%r11 | |
235 | test \$0x0f,$out # out_misaligned | |
236 | cmovnz %rsp,$out | |
237 | test \$0x0f,$inp # inp_misaligned | |
238 | jz .L${mode}_inp_aligned | |
239 | shr \$3,$len | |
240 | .byte 0xf3,0x48,0xa5 # rep movsq | |
241 | sub $chunk,$out | |
242 | mov $chunk,$len | |
243 | mov $out,$inp | |
244 | .L${mode}_inp_aligned: | |
245 | lea -16($ctx),%rax # ivp | |
246 | lea 16($ctx),%rbx # key | |
247 | shr \$4,$len | |
248 | .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt* | |
249 | ___ | |
250 | $code.=<<___ if ($mode !~ /ecb|ctr/); | |
251 | movdqa (%rax),%xmm0 | |
252 | movdqa %xmm0,-16($ctx) # copy [or refresh] iv | |
253 | ___ | |
254 | $code.=<<___; | |
255 | mov %r8,$out # restore paramters | |
256 | mov %r11,$chunk | |
257 | test \$0x0f,$out | |
258 | jz .L${mode}_out_aligned | |
259 | mov $chunk,$len | |
260 | shr \$3,$len | |
261 | lea (%rsp),$inp | |
262 | .byte 0xf3,0x48,0xa5 # rep movsq | |
263 | sub $chunk,$out | |
264 | .L${mode}_out_aligned: | |
265 | mov %r9,$inp | |
266 | mov %r10,$len | |
267 | add $chunk,$out | |
268 | add $chunk,$inp | |
269 | sub $chunk,$len | |
270 | mov \$$PADLOCK_CHUNK,$chunk | |
271 | jnz .L${mode}_loop | |
272 | ||
273 | test \$0x0f,$out | |
274 | jz .L${mode}_done | |
275 | ||
276 | mov %rbp,$len | |
277 | mov %rsp,$out | |
278 | sub %rsp,$len | |
279 | xor %rax,%rax | |
280 | shr \$3,$len | |
281 | .byte 0xf3,0x48,0xab # rep stosq | |
282 | .L${mode}_done: | |
283 | lea (%rbp),%rsp | |
284 | jmp .L${mode}_exit | |
285 | ||
286 | .align 16 | |
287 | .L${mode}_aligned: | |
288 | lea -16($ctx),%rax # ivp | |
289 | lea 16($ctx),%rbx # key | |
290 | shr \$4,$len # len/=AES_BLOCK_SIZE | |
291 | .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt* | |
292 | ___ | |
293 | $code.=<<___ if ($mode !~ /ecb|ctr/); | |
294 | movdqa (%rax),%xmm0 | |
295 | movdqa %xmm0,-16($ctx) # copy [or refresh] iv | |
296 | ___ | |
297 | $code.=<<___; | |
298 | .L${mode}_exit: | |
299 | mov \$1,%eax | |
300 | lea 8(%rsp),%rsp | |
301 | .L${mode}_abort: | |
302 | pop %rbx | |
303 | pop %rbp | |
304 | ret | |
305 | .size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt | |
306 | ___ | |
307 | } | |
308 | ||
309 | &generate_mode("ecb",0xc8); | |
310 | &generate_mode("cbc",0xd0); | |
311 | &generate_mode("cfb",0xe0); | |
312 | &generate_mode("ofb",0xe8); | |
313 | &generate_mode("ctr16",0xd8); | |
314 | ||
315 | $code.=<<___; | |
316 | .asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" | |
317 | .align 16 | |
318 | .data | |
319 | .align 8 | |
320 | .Lpadlock_saved_context: | |
321 | .quad 0 | |
322 | ___ | |
323 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; | |
324 | ||
325 | print $code; | |
326 | ||
327 | close STDOUT; |