]> git.ipfire.org Git - thirdparty/openssl.git/blame - engines/asm/e_padlock-x86_86.pl
Padlock engine: make it independent of inline assembler.
[thirdparty/openssl.git] / engines / asm / e_padlock-x86_86.pl
CommitLineData
ed28aef8
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# September 2011
11#
12# Assembler helpers for Padlock engine.
13
14$flavour = shift;
15$output = shift;
16if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
17
18$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19
20$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
23die "can't locate x86_64-xlate.pl";
24
25open STDOUT,"| $^X $xlate $flavour $output";
26
27$code=".text\n";
28
29$PADLOCK_CHUNK=512; # Must be a power of 2 larger than 16
30
31$ctx="%rdx";
32$out="%rdi";
33$inp="%rsi";
34$len="%rcx";
35$chunk="%rbx";
36
37($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
38 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
39
40$code.=<<___;
41.globl padlock_capability
42.type padlock_capability,\@abi-omnipotent
43.align 16
44padlock_capability:
45 mov %rbx,%r8
46 xor %eax,%eax
47 cpuid
48 xor %eax,%eax
49 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
50 jne .Lnoluck
51 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
52 jne .Lnoluck
53 cmp \$`"0x".unpack("H*",'slua')`,%ecx
54 jne .Lnoluck
55 mov \$0xC0000000,%eax
56 cpuid
57 mov %eax,%edx
58 xor %eax,%eax
59 cmp \$0xC0000001,%edx
60 jb .Lnoluck
61 mov \$0xC0000001,%eax
62 cpuid
63 mov %edx,%eax
64 and \$0xffffffef,%eax
65 or \$0x10,%eax # set Nano bit#4
66.Lnoluck:
67 mov %r8,%rbx
68 ret
69.size padlock_capability,.-padlock_capability
70
71.globl padlock_key_bswap
72.type padlock_key_bswap,\@abi-omnipotent,0
73.align 16
74padlock_key_bswap:
75 mov 240($arg1),%edx
76.Lbswap_loop:
77 mov ($arg1),%eax
78 bswap %eax
79 mov %eax,($arg1)
80 lea 4($arg1),$arg1
81 sub \$1,%edx
82 jnz .Lbswap_loop
83 ret
84.size padlock_key_bswap,.-padlock_key_bswap
85
86.globl padlock_verify_context
87.type padlock_verify_context,\@abi-omnipotent
88.align 16
89padlock_verify_context:
90 mov $arg1,$ctx
91 pushf
92 lea .Lpadlock_saved_context(%rip),%rax
93 call _padlock_verify_ctx
94 lea 8(%rsp),%rsp
95 ret
96.size padlock_verify_context,.-padlock_verify_context
97
98.type _padlock_verify_ctx,\@abi-omnipotent
99.align 16
100_padlock_verify_ctx:
101 mov 8(%rsp),%r8
102 bt \$30,%r8
103 jnc .Lverified
104 cmp (%rax),$ctx
105 je .Lverified
106 pushf
107 popf
108.Lverified:
109 mov $ctx,(%rax)
110 ret
111.size _padlock_verify_ctx,.-_padlock_verify_ctx
112
113.globl padlock_reload_key
114.type padlock_reload_key,\@abi-omnipotent
115.align 16
116padlock_reload_key:
117 pushf
118 popf
119 ret
120.size padlock_reload_key,.-padlock_reload_key
121
122.globl padlock_aes_block
123.type padlock_aes_block,\@function,3
124.align 16
125padlock_aes_block:
126 mov %rbx,%r8
127 mov \$1,$len
128 lea 32($ctx),%rbx # key
129 lea 16($ctx),$ctx # control word
130 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
131 mov %r8,%rbx
132 ret
133.size padlock_aes_block,.-padlock_aes_block
134
135.globl padlock_xstore
136.type padlock_xstore,\@function,2
137.align 16
138padlock_xstore:
139 mov %esi,%edx
140 .byte 0x0f,0xa7,0xc0 # xstore
141 ret
142.size padlock_xstore,.-padlock_xstore
143
144.globl padlock_sha1_oneshot
145.type padlock_sha1_oneshot,\@function,3
146.align 16
147padlock_sha1_oneshot:
148 xor %rax,%rax
149 mov %rdx,%rcx
150 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
151 ret
152.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
153
154.globl padlock_sha1
155.type padlock_sha1,\@function,3
156.align 16
157padlock_sha1:
158 mov \$-1,%rax
159 mov %rdx,%rcx
160 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
161 ret
162.size padlock_sha1,.-padlock_sha1
163
164.globl padlock_sha256_oneshot
165.type padlock_sha256_oneshot,\@function,3
166.align 16
167padlock_sha256_oneshot:
168 xor %rax,%rax
169 mov %rdx,%rcx
170 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
171 ret
172.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
173
174.globl padlock_sha256
175.type padlock_sha256,\@function,3
176.align 16
177padlock_sha256:
178 mov \$-1,%rax
179 mov %rdx,%rcx
180 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
181 ret
182.size padlock_sha256,.-padlock_sha256
183___
184
185sub generate_mode {
186my ($mode,$opcode) = @_;
187# int padlock_$mode_encrypt(void *out, const void *inp,
188# struct padlock_cipher_data *ctx, size_t len);
189$code.=<<___;
190.globl padlock_${mode}_encrypt
191.type padlock_${mode}_encrypt,\@function,4
192.align 16
193padlock_${mode}_encrypt:
194 push %rbp
195 push %rbx
196
197 xor %eax,%eax
198 test \$15,$ctx
199 jnz .L${mode}_abort
200 test \$15,$len
201 jnz .L${mode}_abort
202 lea .Lpadlock_saved_context(%rip),%rax
203 pushf
204 cld
205 call _padlock_verify_ctx
206 lea 16($ctx),$ctx # control word
207 xor %eax,%eax
208 xor %ebx,%ebx
209 test \$`1<<5`,($ctx) # align bit in control word
210 test \$0x0f,$out
211 setz %al # !out_misaligned
212 test \$0x0f,$inp
213 setz %bl # !inp_misaligned
214 test %ebx,%eax
215 jnz .L${mode}_aligned
216 neg %rax
217 mov \$$PADLOCK_CHUNK,$chunk
218 not %rax # out_misaligned?-1:0
219 lea (%rsp),%rbp
220 cmp $chunk,$len
221 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
222 and $chunk,%rax # out_misaligned?chunk:0
223 mov $len,$chunk
224 neg %rax
225 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
226 lea (%rax,%rbp),%rsp
227 jmp .L${mode}_loop
228.align 16
229.L${mode}_loop:
230 mov $out,%r8 # save parameters
231 mov $inp,%r9
232 mov $len,%r10
233 mov $chunk,$len
234 mov $chunk,%r11
235 test \$0x0f,$out # out_misaligned
236 cmovnz %rsp,$out
237 test \$0x0f,$inp # inp_misaligned
238 jz .L${mode}_inp_aligned
239 shr \$3,$len
240 .byte 0xf3,0x48,0xa5 # rep movsq
241 sub $chunk,$out
242 mov $chunk,$len
243 mov $out,$inp
244.L${mode}_inp_aligned:
245 lea -16($ctx),%rax # ivp
246 lea 16($ctx),%rbx # key
247 shr \$4,$len
248 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
249___
250$code.=<<___ if ($mode !~ /ecb|ctr/);
251 movdqa (%rax),%xmm0
252 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
253___
254$code.=<<___;
255 mov %r8,$out # restore paramters
256 mov %r11,$chunk
257 test \$0x0f,$out
258 jz .L${mode}_out_aligned
259 mov $chunk,$len
260 shr \$3,$len
261 lea (%rsp),$inp
262 .byte 0xf3,0x48,0xa5 # rep movsq
263 sub $chunk,$out
264.L${mode}_out_aligned:
265 mov %r9,$inp
266 mov %r10,$len
267 add $chunk,$out
268 add $chunk,$inp
269 sub $chunk,$len
270 mov \$$PADLOCK_CHUNK,$chunk
271 jnz .L${mode}_loop
272
273 test \$0x0f,$out
274 jz .L${mode}_done
275
276 mov %rbp,$len
277 mov %rsp,$out
278 sub %rsp,$len
279 xor %rax,%rax
280 shr \$3,$len
281 .byte 0xf3,0x48,0xab # rep stosq
282.L${mode}_done:
283 lea (%rbp),%rsp
284 jmp .L${mode}_exit
285
286.align 16
287.L${mode}_aligned:
288 lea -16($ctx),%rax # ivp
289 lea 16($ctx),%rbx # key
290 shr \$4,$len # len/=AES_BLOCK_SIZE
291 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
292___
293$code.=<<___ if ($mode !~ /ecb|ctr/);
294 movdqa (%rax),%xmm0
295 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
296___
297$code.=<<___;
298.L${mode}_exit:
299 mov \$1,%eax
300 lea 8(%rsp),%rsp
301.L${mode}_abort:
302 pop %rbx
303 pop %rbp
304 ret
305.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
306___
307}
308
309&generate_mode("ecb",0xc8);
310&generate_mode("cbc",0xd0);
311&generate_mode("cfb",0xe0);
312&generate_mode("ofb",0xe8);
313&generate_mode("ctr16",0xd8);
314
315$code.=<<___;
316.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
317.align 16
318.data
319.align 8
320.Lpadlock_saved_context:
321 .quad 0
322___
323$code =~ s/\`([^\`]*)\`/eval($1)/gem;
324
325print $code;
326
327close STDOUT;