]> git.ipfire.org Git - thirdparty/openssl.git/blame - engines/asm/e_padlock-x86_64.pl
Add fips/ecdh directory.
[thirdparty/openssl.git] / engines / asm / e_padlock-x86_64.pl
CommitLineData
ed28aef8
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# September 2011
11#
50452b2e
AP
12# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
13# details.
ed28aef8
AP
14
15$flavour = shift;
16$output = shift;
17if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
18
19$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
20
21$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
22( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
23( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
24die "can't locate x86_64-xlate.pl";
25
26open STDOUT,"| $^X $xlate $flavour $output";
27
28$code=".text\n";
29
50452b2e 30$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
ed28aef8
AP
31
32$ctx="%rdx";
33$out="%rdi";
34$inp="%rsi";
35$len="%rcx";
36$chunk="%rbx";
37
38($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
39 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
40
41$code.=<<___;
42.globl padlock_capability
43.type padlock_capability,\@abi-omnipotent
44.align 16
45padlock_capability:
46 mov %rbx,%r8
47 xor %eax,%eax
48 cpuid
49 xor %eax,%eax
50 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
51 jne .Lnoluck
52 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
53 jne .Lnoluck
54 cmp \$`"0x".unpack("H*",'slua')`,%ecx
55 jne .Lnoluck
56 mov \$0xC0000000,%eax
57 cpuid
58 mov %eax,%edx
59 xor %eax,%eax
60 cmp \$0xC0000001,%edx
61 jb .Lnoluck
62 mov \$0xC0000001,%eax
63 cpuid
64 mov %edx,%eax
65 and \$0xffffffef,%eax
66 or \$0x10,%eax # set Nano bit#4
67.Lnoluck:
68 mov %r8,%rbx
69 ret
70.size padlock_capability,.-padlock_capability
71
72.globl padlock_key_bswap
73.type padlock_key_bswap,\@abi-omnipotent,0
74.align 16
75padlock_key_bswap:
76 mov 240($arg1),%edx
77.Lbswap_loop:
78 mov ($arg1),%eax
79 bswap %eax
80 mov %eax,($arg1)
81 lea 4($arg1),$arg1
82 sub \$1,%edx
83 jnz .Lbswap_loop
84 ret
85.size padlock_key_bswap,.-padlock_key_bswap
86
87.globl padlock_verify_context
88.type padlock_verify_context,\@abi-omnipotent
89.align 16
90padlock_verify_context:
91 mov $arg1,$ctx
92 pushf
93 lea .Lpadlock_saved_context(%rip),%rax
94 call _padlock_verify_ctx
95 lea 8(%rsp),%rsp
96 ret
97.size padlock_verify_context,.-padlock_verify_context
98
99.type _padlock_verify_ctx,\@abi-omnipotent
100.align 16
101_padlock_verify_ctx:
102 mov 8(%rsp),%r8
103 bt \$30,%r8
104 jnc .Lverified
105 cmp (%rax),$ctx
106 je .Lverified
107 pushf
108 popf
109.Lverified:
110 mov $ctx,(%rax)
111 ret
112.size _padlock_verify_ctx,.-_padlock_verify_ctx
113
114.globl padlock_reload_key
115.type padlock_reload_key,\@abi-omnipotent
116.align 16
117padlock_reload_key:
118 pushf
119 popf
120 ret
121.size padlock_reload_key,.-padlock_reload_key
122
123.globl padlock_aes_block
124.type padlock_aes_block,\@function,3
125.align 16
126padlock_aes_block:
127 mov %rbx,%r8
128 mov \$1,$len
129 lea 32($ctx),%rbx # key
130 lea 16($ctx),$ctx # control word
131 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
132 mov %r8,%rbx
133 ret
134.size padlock_aes_block,.-padlock_aes_block
135
136.globl padlock_xstore
137.type padlock_xstore,\@function,2
138.align 16
139padlock_xstore:
140 mov %esi,%edx
141 .byte 0x0f,0xa7,0xc0 # xstore
142 ret
143.size padlock_xstore,.-padlock_xstore
144
145.globl padlock_sha1_oneshot
146.type padlock_sha1_oneshot,\@function,3
147.align 16
148padlock_sha1_oneshot:
149 xor %rax,%rax
150 mov %rdx,%rcx
151 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
152 ret
153.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
154
149ca712
AP
155.globl padlock_sha1_blocks
156.type padlock_sha1_blocks,\@function,3
ed28aef8 157.align 16
149ca712 158padlock_sha1_blocks:
ed28aef8
AP
159 mov \$-1,%rax
160 mov %rdx,%rcx
161 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
162 ret
149ca712 163.size padlock_sha1_blocks,.-padlock_sha1_blocks
ed28aef8
AP
164
165.globl padlock_sha256_oneshot
166.type padlock_sha256_oneshot,\@function,3
167.align 16
168padlock_sha256_oneshot:
169 xor %rax,%rax
170 mov %rdx,%rcx
171 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
172 ret
173.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
174
149ca712
AP
175.globl padlock_sha256_blocks
176.type padlock_sha256_blocks,\@function,3
ed28aef8 177.align 16
149ca712 178padlock_sha256_blocks:
ed28aef8
AP
179 mov \$-1,%rax
180 mov %rdx,%rcx
181 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
182 ret
149ca712
AP
183.size padlock_sha256_blocks,.-padlock_sha256_blocks
184
d18762f7
AP
185.globl padlock_sha512_blocks
186.type padlock_sha512_blocks,\@function,3
149ca712
AP
187.align 16
188padlock_sha512_blocks:
189 mov %rdx,%rcx
190 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
191 ret
192.size padlock_sha512_blocks,.-padlock_sha512_blocks
ed28aef8
AP
193___
194
195sub generate_mode {
196my ($mode,$opcode) = @_;
197# int padlock_$mode_encrypt(void *out, const void *inp,
198# struct padlock_cipher_data *ctx, size_t len);
199$code.=<<___;
200.globl padlock_${mode}_encrypt
201.type padlock_${mode}_encrypt,\@function,4
202.align 16
203padlock_${mode}_encrypt:
204 push %rbp
205 push %rbx
206
207 xor %eax,%eax
208 test \$15,$ctx
209 jnz .L${mode}_abort
210 test \$15,$len
211 jnz .L${mode}_abort
212 lea .Lpadlock_saved_context(%rip),%rax
213 pushf
214 cld
215 call _padlock_verify_ctx
216 lea 16($ctx),$ctx # control word
217 xor %eax,%eax
218 xor %ebx,%ebx
33987f2f 219 testl \$`1<<5`,($ctx) # align bit in control word
149ca712 220 jnz .L${mode}_aligned
ed28aef8
AP
221 test \$0x0f,$out
222 setz %al # !out_misaligned
223 test \$0x0f,$inp
224 setz %bl # !inp_misaligned
225 test %ebx,%eax
226 jnz .L${mode}_aligned
227 neg %rax
228 mov \$$PADLOCK_CHUNK,$chunk
229 not %rax # out_misaligned?-1:0
230 lea (%rsp),%rbp
231 cmp $chunk,$len
232 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
233 and $chunk,%rax # out_misaligned?chunk:0
234 mov $len,$chunk
235 neg %rax
236 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
237 lea (%rax,%rbp),%rsp
50452b2e
AP
238___
239$code.=<<___ if ($mode eq "ctr32");
240 mov -4($ctx),%eax # pull 32-bit counter
241 bswap %eax
242 neg %eax
243 and \$`$PADLOCK_CHUNK/16-1`,%eax
244 jz .L${mode}_loop
245 shl \$4,%eax
246 cmp %rax,$len
247 cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
248___
249$code.=<<___;
ed28aef8
AP
250 jmp .L${mode}_loop
251.align 16
252.L${mode}_loop:
50452b2e
AP
253 cmp $len,$chunk # ctr32 artefact
254 cmova $len,$chunk # ctr32 artefact
ed28aef8
AP
255 mov $out,%r8 # save parameters
256 mov $inp,%r9
257 mov $len,%r10
258 mov $chunk,$len
259 mov $chunk,%r11
260 test \$0x0f,$out # out_misaligned
261 cmovnz %rsp,$out
262 test \$0x0f,$inp # inp_misaligned
263 jz .L${mode}_inp_aligned
264 shr \$3,$len
265 .byte 0xf3,0x48,0xa5 # rep movsq
266 sub $chunk,$out
267 mov $chunk,$len
268 mov $out,$inp
269.L${mode}_inp_aligned:
270 lea -16($ctx),%rax # ivp
271 lea 16($ctx),%rbx # key
272 shr \$4,$len
273 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
274___
275$code.=<<___ if ($mode !~ /ecb|ctr/);
276 movdqa (%rax),%xmm0
277 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
278___
50452b2e
AP
279$code.=<<___ if ($mode eq "ctr32");
280 mov -4($ctx),%eax # pull 32-bit counter
281 test \$0xffff0000,%eax
282 jnz .L${mode}_no_corr
283 bswap %eax
284 add \$0x10000,%eax
285 bswap %eax
286 mov %eax,-4($ctx)
287.L${mode}_no_corr:
288___
ed28aef8
AP
289$code.=<<___;
290 mov %r8,$out # restore paramters
291 mov %r11,$chunk
292 test \$0x0f,$out
293 jz .L${mode}_out_aligned
294 mov $chunk,$len
295 shr \$3,$len
296 lea (%rsp),$inp
297 .byte 0xf3,0x48,0xa5 # rep movsq
298 sub $chunk,$out
299.L${mode}_out_aligned:
300 mov %r9,$inp
301 mov %r10,$len
302 add $chunk,$out
303 add $chunk,$inp
304 sub $chunk,$len
305 mov \$$PADLOCK_CHUNK,$chunk
306 jnz .L${mode}_loop
307
308 test \$0x0f,$out
309 jz .L${mode}_done
310
311 mov %rbp,$len
312 mov %rsp,$out
313 sub %rsp,$len
314 xor %rax,%rax
315 shr \$3,$len
316 .byte 0xf3,0x48,0xab # rep stosq
317.L${mode}_done:
318 lea (%rbp),%rsp
319 jmp .L${mode}_exit
320
321.align 16
322.L${mode}_aligned:
50452b2e
AP
323___
324$code.=<<___ if ($mode eq "ctr32");
325 mov -4($ctx),%eax # pull 32-bit counter
326 mov \$`16*0x10000`,$chunk
327 bswap %eax
328 cmp $len,$chunk
329 cmova $len,$chunk
330 neg %eax
331 and \$0xffff,%eax
332 jz .L${mode}_aligned_loop
333 shl \$4,%eax
334 cmp %rax,$len
335 cmova %rax,$chunk # don't let counter cross 2^16
336 jmp .L${mode}_aligned_loop
337.align 16
338.L${mode}_aligned_loop:
339 cmp $len,$chunk
340 cmova $len,$chunk
341 mov $len,%r10 # save parameters
342 mov $chunk,$len
343 mov $chunk,%r11
344___
345$code.=<<___;
ed28aef8
AP
346 lea -16($ctx),%rax # ivp
347 lea 16($ctx),%rbx # key
348 shr \$4,$len # len/=AES_BLOCK_SIZE
349 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
350___
351$code.=<<___ if ($mode !~ /ecb|ctr/);
352 movdqa (%rax),%xmm0
353 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
354___
50452b2e
AP
355$code.=<<___ if ($mode eq "ctr32");
356 mov -4($ctx),%eax # pull 32-bit counter
357 bswap %eax
358 add \$0x10000,%eax
359 bswap %eax
360 mov %eax,-4($ctx)
361
362 mov %r11,$chunk # restore paramters
363 mov %r10,$len
364 sub $chunk,$len
365 mov \$`16*0x10000`,$chunk
366 jnz .L${mode}_aligned_loop
367___
ed28aef8
AP
368$code.=<<___;
369.L${mode}_exit:
370 mov \$1,%eax
371 lea 8(%rsp),%rsp
372.L${mode}_abort:
373 pop %rbx
374 pop %rbp
375 ret
376.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
377___
378}
379
380&generate_mode("ecb",0xc8);
381&generate_mode("cbc",0xd0);
382&generate_mode("cfb",0xe0);
383&generate_mode("ofb",0xe8);
50452b2e 384&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
ed28aef8
AP
385
386$code.=<<___;
387.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
388.align 16
389.data
390.align 8
391.Lpadlock_saved_context:
392 .quad 0
393___
394$code =~ s/\`([^\`]*)\`/eval($1)/gem;
395
396print $code;
397
398close STDOUT;