]>
Commit | Line | Data |
---|---|---|
6aa36e8e RS |
1 | #! /usr/bin/env perl |
2 | # Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
4 | # Licensed under the OpenSSL license (the "License"). You may not use | |
5 | # this file except in compliance with the License. You can obtain a copy | |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
ae381fef AP |
9 | |
10 | # ==================================================================== | |
11 | # Copyright (c) 2008 Andy Polyakov <appro@openssl.org> | |
12 | # | |
13 | # This module may be used under the terms of either the GNU General | |
14 | # Public License version 2 or later, the GNU Lesser General Public | |
15 | # License version 2.1 or later, the Mozilla Public License version | |
16 | # 1.1 or the BSD License. The exact terms of either license are | |
17 | # distributed along with this module. For further details see | |
18 | # http://www.openssl.org/~appro/camellia/. | |
19 | # ==================================================================== | |
20 | ||
21 | # Performance in cycles per processed byte (less is better) in | |
22 | # 'openssl speed ...' benchmark: | |
23 | # | |
24 | # AMD64 Core2 EM64T | |
25 | # -evp camellia-128-ecb 16.7 21.0 22.7 | |
26 | # + over gcc 3.4.6 +25% +5% 0% | |
27 | # | |
28 | # camellia-128-cbc 15.7 20.4 21.1 | |
29 | # | |
30 | # 128-bit key setup 128 216 205 cycles/key | |
31 | # + over gcc 3.4.6 +54% +39% +15% | |
32 | # | |
33 | # Numbers in "+" rows represent performance improvement over compiler | |
34 | # generated code. Key setup timings are impressive on AMD and Core2 | |
35 | # thanks to 64-bit operations being covertly deployed. Improvement on | |
36 | # EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it | |
37 | # apparently emulates some of 64-bit operations in [32-bit] microcode. | |
38 | ||
39 | $flavour = shift; | |
40 | $output = shift; | |
41 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |
42 | ||
43 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | |
44 | ||
45 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
46 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | |
47 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | |
48 | die "can't locate x86_64-xlate.pl"; | |
49 | ||
cfe1d992 | 50 | open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; |
46bf83f0 | 51 | *STDOUT=*OUT; |
ae381fef AP |
52 | |
53 | sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } | |
54 | sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; | |
55 | $r =~ s/%[er]([sd]i)/%\1l/; | |
56 | $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } | |
57 | ||
58 | $t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx"; | |
59 | @S=("%r8d","%r9d","%r10d","%r11d"); | |
60 | $i0="%esi"; | |
61 | $i1="%edi"; | |
62 | $Tbl="%rbp"; # size optimization | |
63 | $inp="%r12"; | |
64 | $out="%r13"; | |
65 | $key="%r14"; | |
66 | $keyend="%r15"; | |
67 | $arg0d=$win64?"%ecx":"%edi"; | |
68 | ||
69 | # const unsigned int Camellia_SBOX[4][256]; | |
70 | # Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], | |
71 | # and [2][] - with [3][]. This is done to minimize code size. | |
72 | $SBOX1_1110=0; # Camellia_SBOX[0] | |
73 | $SBOX4_4404=4; # Camellia_SBOX[1] | |
74 | $SBOX2_0222=2048; # Camellia_SBOX[2] | |
75 | $SBOX3_3033=2052; # Camellia_SBOX[3] | |
76 | ||
77 | sub Camellia_Feistel { | |
78 | my $i=@_[0]; | |
79 | my $seed=defined(@_[1])?@_[1]:0; | |
80 | my $scale=$seed<0?-8:8; | |
81 | my $j=($i&1)*2; | |
f9c5e5d9 | 82 | my ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]); |
ae381fef AP |
83 | |
84 | $code.=<<___; | |
85 | xor $s0,$t0 # t0^=key[0] | |
86 | xor $s1,$t1 # t1^=key[1] | |
87 | movz `&hi("$t0")`,$i0 # (t0>>8)&0xff | |
88 | movz `&lo("$t1")`,$i1 # (t1>>0)&0xff | |
89 | mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0] | |
90 | mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1] | |
91 | movz `&lo("$t0")`,$i0 # (t0>>0)&0xff | |
92 | shr \$16,$t0 | |
93 | movz `&hi("$t1")`,$i1 # (t1>>8)&0xff | |
94 | xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0] | |
95 | shr \$16,$t1 | |
96 | xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1] | |
97 | movz `&hi("$t0")`,$i0 # (t0>>24)&0xff | |
98 | movz `&lo("$t1")`,$i1 # (t1>>16)&0xff | |
99 | xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0] | |
100 | xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1] | |
101 | movz `&lo("$t0")`,$i0 # (t0>>16)&0xff | |
102 | movz `&hi("$t1")`,$i1 # (t1>>24)&0xff | |
103 | xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0] | |
104 | xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1] | |
105 | mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1] | |
106 | mov `$seed+($i+1)*$scale+4`($key),$t0 | |
107 | xor $t3,$t2 # t2^=t3 | |
108 | ror \$8,$t3 # t3=RightRotate(t3,8) | |
109 | xor $t2,$s2 | |
110 | xor $t2,$s3 | |
111 | xor $t3,$s3 | |
112 | ___ | |
113 | } | |
114 | ||
115 | # void Camellia_EncryptBlock_Rounds( | |
116 | # int grandRounds, | |
117 | # const Byte plaintext[], | |
118 | # const KEY_TABLE_TYPE keyTable, | |
119 | # Byte ciphertext[]) | |
120 | $code=<<___; | |
121 | .text | |
122 | ||
123 | # V1.x API | |
124 | .globl Camellia_EncryptBlock | |
125 | .type Camellia_EncryptBlock,\@abi-omnipotent | |
126 | .align 16 | |
127 | Camellia_EncryptBlock: | |
062acbc3 | 128 | .cfi_startproc |
ae381fef AP |
129 | movl \$128,%eax |
130 | subl $arg0d,%eax | |
131 | movl \$3,$arg0d | |
132 | adcl \$0,$arg0d # keyBitLength==128?3:4 | |
133 | jmp .Lenc_rounds | |
062acbc3 | 134 | .cfi_endproc |
ae381fef AP |
135 | .size Camellia_EncryptBlock,.-Camellia_EncryptBlock |
136 | # V2 | |
137 | .globl Camellia_EncryptBlock_Rounds | |
138 | .type Camellia_EncryptBlock_Rounds,\@function,4 | |
139 | .align 16 | |
140 | .Lenc_rounds: | |
141 | Camellia_EncryptBlock_Rounds: | |
49508b23 | 142 | .cfi_startproc |
ae381fef | 143 | push %rbx |
49508b23 | 144 | .cfi_push %rbx |
ae381fef | 145 | push %rbp |
49508b23 | 146 | .cfi_push %rbp |
ae381fef | 147 | push %r13 |
49508b23 | 148 | .cfi_push %r13 |
ae381fef | 149 | push %r14 |
49508b23 | 150 | .cfi_push %r14 |
ae381fef | 151 | push %r15 |
49508b23 | 152 | .cfi_push %r15 |
ae381fef AP |
153 | .Lenc_prologue: |
154 | ||
155 | #mov %rsi,$inp # put away arguments | |
156 | mov %rcx,$out | |
157 | mov %rdx,$key | |
158 | ||
159 | shl \$6,%edi # process grandRounds | |
160 | lea .LCamellia_SBOX(%rip),$Tbl | |
161 | lea ($key,%rdi),$keyend | |
162 | ||
163 | mov 0(%rsi),@S[0] # load plaintext | |
164 | mov 4(%rsi),@S[1] | |
165 | mov 8(%rsi),@S[2] | |
166 | bswap @S[0] | |
167 | mov 12(%rsi),@S[3] | |
168 | bswap @S[1] | |
169 | bswap @S[2] | |
170 | bswap @S[3] | |
171 | ||
172 | call _x86_64_Camellia_encrypt | |
173 | ||
174 | bswap @S[0] | |
175 | bswap @S[1] | |
176 | bswap @S[2] | |
177 | mov @S[0],0($out) | |
178 | bswap @S[3] | |
179 | mov @S[1],4($out) | |
180 | mov @S[2],8($out) | |
181 | mov @S[3],12($out) | |
182 | ||
183 | mov 0(%rsp),%r15 | |
49508b23 | 184 | .cfi_restore %r15 |
ae381fef | 185 | mov 8(%rsp),%r14 |
49508b23 | 186 | .cfi_restore %r14 |
ae381fef | 187 | mov 16(%rsp),%r13 |
49508b23 | 188 | .cfi_restore %r13 |
ae381fef | 189 | mov 24(%rsp),%rbp |
49508b23 | 190 | .cfi_restore %rbp |
ae381fef | 191 | mov 32(%rsp),%rbx |
49508b23 | 192 | .cfi_restore %rbx |
ae381fef | 193 | lea 40(%rsp),%rsp |
49508b23 | 194 | .cfi_adjust_cfa_offset -40 |
ae381fef AP |
195 | .Lenc_epilogue: |
196 | ret | |
49508b23 | 197 | .cfi_endproc |
ae381fef AP |
198 | .size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds |
199 | ||
200 | .type _x86_64_Camellia_encrypt,\@abi-omnipotent | |
201 | .align 16 | |
202 | _x86_64_Camellia_encrypt: | |
062acbc3 | 203 | .cfi_startproc |
ae381fef AP |
204 | xor 0($key),@S[1] |
205 | xor 4($key),@S[0] # ^=key[0-3] | |
206 | xor 8($key),@S[3] | |
207 | xor 12($key),@S[2] | |
208 | .align 16 | |
209 | .Leloop: | |
210 | mov 16($key),$t1 # prefetch key[4-5] | |
211 | mov 20($key),$t0 | |
212 | ||
213 | ___ | |
214 | for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); } | |
215 | $code.=<<___; | |
216 | lea 16*4($key),$key | |
217 | cmp $keyend,$key | |
218 | mov 8($key),$t3 # prefetch key[2-3] | |
219 | mov 12($key),$t2 | |
220 | je .Ledone | |
221 | ||
222 | and @S[0],$t0 | |
223 | or @S[3],$t3 | |
224 | rol \$1,$t0 | |
225 | xor $t3,@S[2] # s2^=s3|key[3]; | |
226 | xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); | |
227 | and @S[2],$t2 | |
228 | or @S[1],$t1 | |
229 | rol \$1,$t2 | |
230 | xor $t1,@S[0] # s0^=s1|key[1]; | |
231 | xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); | |
232 | jmp .Leloop | |
233 | ||
234 | .align 16 | |
235 | .Ledone: | |
236 | xor @S[2],$t0 # SwapHalf | |
237 | xor @S[3],$t1 | |
238 | xor @S[0],$t2 | |
239 | xor @S[1],$t3 | |
240 | ||
241 | mov $t0,@S[0] | |
242 | mov $t1,@S[1] | |
243 | mov $t2,@S[2] | |
244 | mov $t3,@S[3] | |
245 | ||
246 | .byte 0xf3,0xc3 # rep ret | |
062acbc3 | 247 | .cfi_endproc |
ae381fef AP |
248 | .size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt |
249 | ||
250 | # V1.x API | |
251 | .globl Camellia_DecryptBlock | |
252 | .type Camellia_DecryptBlock,\@abi-omnipotent | |
253 | .align 16 | |
254 | Camellia_DecryptBlock: | |
062acbc3 | 255 | .cfi_startproc |
ae381fef AP |
256 | movl \$128,%eax |
257 | subl $arg0d,%eax | |
258 | movl \$3,$arg0d | |
259 | adcl \$0,$arg0d # keyBitLength==128?3:4 | |
260 | jmp .Ldec_rounds | |
062acbc3 | 261 | .cfi_endproc |
ae381fef AP |
262 | .size Camellia_DecryptBlock,.-Camellia_DecryptBlock |
263 | # V2 | |
264 | .globl Camellia_DecryptBlock_Rounds | |
265 | .type Camellia_DecryptBlock_Rounds,\@function,4 | |
266 | .align 16 | |
267 | .Ldec_rounds: | |
268 | Camellia_DecryptBlock_Rounds: | |
49508b23 | 269 | .cfi_startproc |
ae381fef | 270 | push %rbx |
49508b23 | 271 | .cfi_push %rbx |
ae381fef | 272 | push %rbp |
49508b23 | 273 | .cfi_push %rbp |
ae381fef | 274 | push %r13 |
49508b23 | 275 | .cfi_push %r13 |
ae381fef | 276 | push %r14 |
49508b23 | 277 | .cfi_push %r14 |
ae381fef | 278 | push %r15 |
49508b23 | 279 | .cfi_push %r15 |
ae381fef AP |
280 | .Ldec_prologue: |
281 | ||
282 | #mov %rsi,$inp # put away arguments | |
283 | mov %rcx,$out | |
284 | mov %rdx,$keyend | |
285 | ||
286 | shl \$6,%edi # process grandRounds | |
287 | lea .LCamellia_SBOX(%rip),$Tbl | |
288 | lea ($keyend,%rdi),$key | |
289 | ||
290 | mov 0(%rsi),@S[0] # load plaintext | |
291 | mov 4(%rsi),@S[1] | |
292 | mov 8(%rsi),@S[2] | |
293 | bswap @S[0] | |
294 | mov 12(%rsi),@S[3] | |
295 | bswap @S[1] | |
296 | bswap @S[2] | |
297 | bswap @S[3] | |
298 | ||
299 | call _x86_64_Camellia_decrypt | |
300 | ||
301 | bswap @S[0] | |
302 | bswap @S[1] | |
303 | bswap @S[2] | |
304 | mov @S[0],0($out) | |
305 | bswap @S[3] | |
306 | mov @S[1],4($out) | |
307 | mov @S[2],8($out) | |
308 | mov @S[3],12($out) | |
309 | ||
310 | mov 0(%rsp),%r15 | |
49508b23 | 311 | .cfi_restore %r15 |
ae381fef | 312 | mov 8(%rsp),%r14 |
49508b23 | 313 | .cfi_restore %r14 |
ae381fef | 314 | mov 16(%rsp),%r13 |
49508b23 | 315 | .cfi_restore %r13 |
ae381fef | 316 | mov 24(%rsp),%rbp |
49508b23 | 317 | .cfi_restore %rbp |
ae381fef | 318 | mov 32(%rsp),%rbx |
49508b23 | 319 | .cfi_restore %rbx |
ae381fef | 320 | lea 40(%rsp),%rsp |
49508b23 | 321 | .cfi_adjust_cfa_offset -40 |
ae381fef AP |
322 | .Ldec_epilogue: |
323 | ret | |
49508b23 | 324 | .cfi_endproc |
ae381fef AP |
325 | .size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds |
326 | ||
327 | .type _x86_64_Camellia_decrypt,\@abi-omnipotent | |
328 | .align 16 | |
329 | _x86_64_Camellia_decrypt: | |
062acbc3 | 330 | .cfi_startproc |
ae381fef AP |
331 | xor 0($key),@S[1] |
332 | xor 4($key),@S[0] # ^=key[0-3] | |
333 | xor 8($key),@S[3] | |
334 | xor 12($key),@S[2] | |
335 | .align 16 | |
336 | .Ldloop: | |
337 | mov -8($key),$t1 # prefetch key[4-5] | |
338 | mov -4($key),$t0 | |
339 | ||
340 | ___ | |
341 | for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); } | |
342 | $code.=<<___; | |
343 | lea -16*4($key),$key | |
344 | cmp $keyend,$key | |
345 | mov 0($key),$t3 # prefetch key[2-3] | |
346 | mov 4($key),$t2 | |
347 | je .Lddone | |
348 | ||
349 | and @S[0],$t0 | |
350 | or @S[3],$t3 | |
351 | rol \$1,$t0 | |
352 | xor $t3,@S[2] # s2^=s3|key[3]; | |
353 | xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); | |
354 | and @S[2],$t2 | |
355 | or @S[1],$t1 | |
356 | rol \$1,$t2 | |
357 | xor $t1,@S[0] # s0^=s1|key[1]; | |
358 | xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); | |
359 | ||
360 | jmp .Ldloop | |
361 | ||
362 | .align 16 | |
363 | .Lddone: | |
364 | xor @S[2],$t2 | |
365 | xor @S[3],$t3 | |
366 | xor @S[0],$t0 | |
367 | xor @S[1],$t1 | |
368 | ||
369 | mov $t2,@S[0] # SwapHalf | |
370 | mov $t3,@S[1] | |
371 | mov $t0,@S[2] | |
372 | mov $t1,@S[3] | |
373 | ||
374 | .byte 0xf3,0xc3 # rep ret | |
062acbc3 | 375 | .cfi_endproc |
ae381fef AP |
376 | .size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt |
377 | ___ | |
378 | ||
379 | sub _saveround { | |
380 | my ($rnd,$key,@T)=@_; | |
381 | my $bias=int(@T[0])?shift(@T):0; | |
382 | ||
383 | if ($#T==3) { | |
384 | $code.=<<___; | |
385 | mov @T[1],`$bias+$rnd*8+0`($key) | |
386 | mov @T[0],`$bias+$rnd*8+4`($key) | |
387 | mov @T[3],`$bias+$rnd*8+8`($key) | |
388 | mov @T[2],`$bias+$rnd*8+12`($key) | |
389 | ___ | |
390 | } else { | |
391 | $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n"; | |
392 | $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1); | |
393 | } | |
394 | } | |
395 | ||
396 | sub _loadround { | |
397 | my ($rnd,$key,@T)=@_; | |
398 | my $bias=int(@T[0])?shift(@T):0; | |
399 | ||
400 | $code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n"; | |
401 | $code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1); | |
402 | } | |
403 | ||
404 | # shld is very slow on Intel EM64T family. Even on AMD it limits | |
405 | # instruction decode rate [because it's VectorPath] and consequently | |
406 | # performance... | |
407 | sub __rotl128 { | |
408 | my ($i0,$i1,$rot)=@_; | |
409 | ||
410 | if ($rot) { | |
411 | $code.=<<___; | |
412 | mov $i0,%r11 | |
413 | shld \$$rot,$i1,$i0 | |
414 | shld \$$rot,%r11,$i1 | |
415 | ___ | |
416 | } | |
417 | } | |
418 | ||
419 | # ... Implementing 128-bit rotate without shld gives 80% better | |
420 | # performance EM64T, +15% on AMD64 and only ~7% degradation on | |
421 | # Core2. This is therefore preferred. | |
422 | sub _rotl128 { | |
423 | my ($i0,$i1,$rot)=@_; | |
424 | ||
425 | if ($rot) { | |
426 | $code.=<<___; | |
427 | mov $i0,%r11 | |
428 | shl \$$rot,$i0 | |
429 | mov $i1,%r9 | |
430 | shr \$`64-$rot`,%r9 | |
431 | shr \$`64-$rot`,%r11 | |
432 | or %r9,$i0 | |
433 | shl \$$rot,$i1 | |
434 | or %r11,$i1 | |
435 | ___ | |
436 | } | |
437 | } | |
438 | ||
439 | { my $step=0; | |
440 | ||
441 | $code.=<<___; | |
442 | .globl Camellia_Ekeygen | |
443 | .type Camellia_Ekeygen,\@function,3 | |
444 | .align 16 | |
445 | Camellia_Ekeygen: | |
49508b23 | 446 | .cfi_startproc |
ae381fef | 447 | push %rbx |
49508b23 | 448 | .cfi_push %rbx |
ae381fef | 449 | push %rbp |
49508b23 | 450 | .cfi_push %rbp |
ae381fef | 451 | push %r13 |
49508b23 | 452 | .cfi_push %r13 |
ae381fef | 453 | push %r14 |
49508b23 | 454 | .cfi_push %r14 |
ae381fef | 455 | push %r15 |
49508b23 | 456 | .cfi_push %r15 |
ae381fef AP |
457 | .Lkey_prologue: |
458 | ||
f9c5e5d9 | 459 | mov %edi,${keyend}d # put away arguments, keyBitLength |
ae381fef AP |
460 | mov %rdx,$out # keyTable |
461 | ||
462 | mov 0(%rsi),@S[0] # load 0-127 bits | |
463 | mov 4(%rsi),@S[1] | |
464 | mov 8(%rsi),@S[2] | |
465 | mov 12(%rsi),@S[3] | |
466 | ||
467 | bswap @S[0] | |
468 | bswap @S[1] | |
469 | bswap @S[2] | |
470 | bswap @S[3] | |
471 | ___ | |
472 | &_saveround (0,$out,@S); # KL<<<0 | |
473 | $code.=<<___; | |
474 | cmp \$128,$keyend # check keyBitLength | |
475 | je .L1st128 | |
476 | ||
477 | mov 16(%rsi),@S[0] # load 128-191 bits | |
478 | mov 20(%rsi),@S[1] | |
479 | cmp \$192,$keyend | |
480 | je .L1st192 | |
481 | mov 24(%rsi),@S[2] # load 192-255 bits | |
482 | mov 28(%rsi),@S[3] | |
483 | jmp .L1st256 | |
484 | .L1st192: | |
485 | mov @S[0],@S[2] | |
486 | mov @S[1],@S[3] | |
487 | not @S[2] | |
488 | not @S[3] | |
489 | .L1st256: | |
490 | bswap @S[0] | |
491 | bswap @S[1] | |
492 | bswap @S[2] | |
493 | bswap @S[3] | |
494 | ___ | |
495 | &_saveround (4,$out,@S); # temp storage for KR! | |
496 | $code.=<<___; | |
497 | xor 0($out),@S[1] # KR^KL | |
498 | xor 4($out),@S[0] | |
499 | xor 8($out),@S[3] | |
500 | xor 12($out),@S[2] | |
501 | ||
502 | .L1st128: | |
503 | lea .LCamellia_SIGMA(%rip),$key | |
504 | lea .LCamellia_SBOX(%rip),$Tbl | |
505 | ||
506 | mov 0($key),$t1 | |
507 | mov 4($key),$t0 | |
508 | ___ | |
509 | &Camellia_Feistel($step++); | |
510 | &Camellia_Feistel($step++); | |
511 | $code.=<<___; | |
512 | xor 0($out),@S[1] # ^KL | |
513 | xor 4($out),@S[0] | |
514 | xor 8($out),@S[3] | |
515 | xor 12($out),@S[2] | |
516 | ___ | |
517 | &Camellia_Feistel($step++); | |
518 | &Camellia_Feistel($step++); | |
519 | $code.=<<___; | |
520 | cmp \$128,$keyend | |
521 | jne .L2nd256 | |
522 | ||
523 | lea 128($out),$out # size optimization | |
524 | shl \$32,%r8 # @S[0]|| | |
525 | shl \$32,%r10 # @S[2]|| | |
526 | or %r9,%r8 # ||@S[1] | |
527 | or %r11,%r10 # ||@S[3] | |
528 | ___ | |
529 | &_loadround (0,$out,-128,"%rax","%rbx"); # KL | |
530 | &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0 | |
531 | &_rotl128 ("%rax","%rbx",15); | |
532 | &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15 | |
533 | &_rotl128 ("%r8","%r10",15); | |
534 | &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15 | |
535 | &_rotl128 ("%r8","%r10",15); # 15+15=30 | |
536 | &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30 | |
537 | &_rotl128 ("%rax","%rbx",30); # 15+30=45 | |
538 | &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45 | |
539 | &_rotl128 ("%r8","%r10",15); # 30+15=45 | |
540 | &_saveround (12,$out,-128,"%r8"); # KA<<<45 | |
541 | &_rotl128 ("%rax","%rbx",15); # 45+15=60 | |
542 | &_saveround (13,$out,-128,"%rbx"); # KL<<<60 | |
543 | &_rotl128 ("%r8","%r10",15); # 45+15=60 | |
544 | &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60 | |
545 | &_rotl128 ("%rax","%rbx",17); # 60+17=77 | |
546 | &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77 | |
547 | &_rotl128 ("%rax","%rbx",17); # 77+17=94 | |
548 | &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94 | |
549 | &_rotl128 ("%r8","%r10",34); # 60+34=94 | |
550 | &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94 | |
551 | &_rotl128 ("%rax","%rbx",17); # 94+17=111 | |
552 | &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111 | |
553 | &_rotl128 ("%r8","%r10",17); # 94+17=111 | |
554 | &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111 | |
555 | $code.=<<___; | |
556 | mov \$3,%eax | |
557 | jmp .Ldone | |
558 | .align 16 | |
559 | .L2nd256: | |
560 | ___ | |
561 | &_saveround (6,$out,@S); # temp storage for KA! | |
562 | $code.=<<___; | |
563 | xor `4*8+0`($out),@S[1] # KA^KR | |
564 | xor `4*8+4`($out),@S[0] | |
565 | xor `5*8+0`($out),@S[3] | |
566 | xor `5*8+4`($out),@S[2] | |
567 | ___ | |
568 | &Camellia_Feistel($step++); | |
569 | &Camellia_Feistel($step++); | |
570 | ||
571 | &_loadround (0,$out,"%rax","%rbx"); # KL | |
572 | &_loadround (4,$out,"%rcx","%rdx"); # KR | |
573 | &_loadround (6,$out,"%r14","%r15"); # KA | |
574 | $code.=<<___; | |
575 | lea 128($out),$out # size optimization | |
576 | shl \$32,%r8 # @S[0]|| | |
577 | shl \$32,%r10 # @S[2]|| | |
578 | or %r9,%r8 # ||@S[1] | |
579 | or %r11,%r10 # ||@S[3] | |
580 | ___ | |
581 | &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0 | |
582 | &_rotl128 ("%rcx","%rdx",15); | |
583 | &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15 | |
584 | &_rotl128 ("%r14","%r15",15); | |
585 | &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15 | |
586 | &_rotl128 ("%rcx","%rdx",15); # 15+15=30 | |
587 | &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30 | |
588 | &_rotl128 ("%r8","%r10",30); | |
589 | &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30 | |
590 | &_rotl128 ("%rax","%rbx",45); | |
591 | &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45 | |
592 | &_rotl128 ("%r14","%r15",30); # 15+30=45 | |
593 | &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45 | |
594 | &_rotl128 ("%rax","%rbx",15); # 45+15=60 | |
595 | &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60 | |
596 | &_rotl128 ("%rcx","%rdx",30); # 30+30=60 | |
597 | &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60 | |
598 | &_rotl128 ("%r8","%r10",30); # 30+30=60 | |
599 | &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60 | |
600 | &_rotl128 ("%rax","%rbx",17); # 60+17=77 | |
601 | &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77 | |
602 | &_rotl128 ("%r14","%r15",32); # 45+32=77 | |
603 | &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77 | |
604 | &_rotl128 ("%rcx","%rdx",34); # 60+34=94 | |
605 | &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94 | |
606 | &_rotl128 ("%r14","%r15",17); # 77+17=94 | |
607 | &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77 | |
608 | &_rotl128 ("%rax","%rbx",34); # 77+34=111 | |
609 | &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111 | |
610 | &_rotl128 ("%r8","%r10",51); # 60+51=111 | |
611 | &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111 | |
612 | $code.=<<___; | |
613 | mov \$4,%eax | |
614 | .Ldone: | |
615 | mov 0(%rsp),%r15 | |
49508b23 | 616 | .cfi_restore %r15 |
ae381fef | 617 | mov 8(%rsp),%r14 |
49508b23 | 618 | .cfi_restore %r14 |
ae381fef | 619 | mov 16(%rsp),%r13 |
49508b23 | 620 | .cfi_restore %r13 |
ae381fef | 621 | mov 24(%rsp),%rbp |
49508b23 | 622 | .cfi_restore %rbp |
ae381fef | 623 | mov 32(%rsp),%rbx |
49508b23 | 624 | .cfi_restore %rbx |
ae381fef | 625 | lea 40(%rsp),%rsp |
49508b23 | 626 | .cfi_adjust_cfa_offset -40 |
ae381fef AP |
627 | .Lkey_epilogue: |
628 | ret | |
49508b23 | 629 | .cfi_endproc |
ae381fef AP |
630 | .size Camellia_Ekeygen,.-Camellia_Ekeygen |
631 | ___ | |
632 | } | |
633 | ||
634 | @SBOX=( | |
635 | 112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, | |
636 | 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, | |
637 | 134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, | |
638 | 166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, | |
639 | 139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, | |
640 | 223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, | |
641 | 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, | |
642 | 254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, | |
643 | 170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, | |
644 | 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, | |
645 | 135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, | |
646 | 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, | |
647 | 233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, | |
648 | 120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, | |
649 | 114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, | |
650 | 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); | |
651 | ||
652 | sub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); } | |
653 | sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); } | |
654 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); } | |
655 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); } | |
656 | ||
657 | $code.=<<___; | |
658 | .align 64 | |
659 | .LCamellia_SIGMA: | |
660 | .long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 | |
661 | .long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 | |
662 | .long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 | |
663 | .long 0, 0, 0, 0 | |
664 | .LCamellia_SBOX: | |
665 | ___ | |
666 | # tables are interleaved, remember? | |
667 | sub data_word { $code.=".long\t".join(',',@_)."\n"; } | |
668 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } | |
669 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } | |
670 | ||
671 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, | |
672 | # size_t length, const CAMELLIA_KEY *key, | |
673 | # unsigned char *ivp,const int enc); | |
674 | { | |
675 | $_key="0(%rsp)"; | |
676 | $_end="8(%rsp)"; # inp+len&~15 | |
677 | $_res="16(%rsp)"; # len&15 | |
678 | $ivec="24(%rsp)"; | |
679 | $_ivp="40(%rsp)"; | |
680 | $_rsp="48(%rsp)"; | |
681 | ||
682 | $code.=<<___; | |
683 | .globl Camellia_cbc_encrypt | |
684 | .type Camellia_cbc_encrypt,\@function,6 | |
685 | .align 16 | |
686 | Camellia_cbc_encrypt: | |
49508b23 | 687 | .cfi_startproc |
ae381fef AP |
688 | cmp \$0,%rdx |
689 | je .Lcbc_abort | |
690 | push %rbx | |
49508b23 | 691 | .cfi_push %rbx |
ae381fef | 692 | push %rbp |
49508b23 | 693 | .cfi_push %rbp |
ae381fef | 694 | push %r12 |
49508b23 | 695 | .cfi_push %r12 |
ae381fef | 696 | push %r13 |
49508b23 | 697 | .cfi_push %r13 |
ae381fef | 698 | push %r14 |
49508b23 | 699 | .cfi_push %r14 |
ae381fef | 700 | push %r15 |
49508b23 | 701 | .cfi_push %r15 |
ae381fef AP |
702 | .Lcbc_prologue: |
703 | ||
704 | mov %rsp,%rbp | |
49508b23 | 705 | .cfi_def_cfa_register %rbp |
ae381fef AP |
706 | sub \$64,%rsp |
707 | and \$-64,%rsp | |
708 | ||
709 | # place stack frame just "above mod 1024" the key schedule, | |
710 | # this ensures that cache associativity suffices | |
711 | lea -64-63(%rcx),%r10 | |
712 | sub %rsp,%r10 | |
713 | neg %r10 | |
714 | and \$0x3C0,%r10 | |
715 | sub %r10,%rsp | |
bec45a35 | 716 | #add \$8,%rsp # 8 is reserved for callee's ra |
ae381fef AP |
717 | |
718 | mov %rdi,$inp # inp argument | |
719 | mov %rsi,$out # out argument | |
720 | mov %r8,%rbx # ivp argument | |
721 | mov %rcx,$key # key argument | |
282feeba | 722 | mov 272(%rcx),${keyend}d # grandRounds |
ae381fef AP |
723 | |
724 | mov %r8,$_ivp | |
725 | mov %rbp,$_rsp | |
49508b23 | 726 | .cfi_cfa_expression $_rsp,deref,+56 |
ae381fef AP |
727 | |
728 | .Lcbc_body: | |
729 | lea .LCamellia_SBOX(%rip),$Tbl | |
730 | ||
731 | mov \$32,%ecx | |
732 | .align 4 | |
733 | .Lcbc_prefetch_sbox: | |
734 | mov 0($Tbl),%rax | |
735 | mov 32($Tbl),%rsi | |
736 | mov 64($Tbl),%rdi | |
737 | mov 96($Tbl),%r11 | |
738 | lea 128($Tbl),$Tbl | |
739 | loop .Lcbc_prefetch_sbox | |
740 | sub \$4096,$Tbl | |
741 | shl \$6,$keyend | |
742 | mov %rdx,%rcx # len argument | |
743 | lea ($key,$keyend),$keyend | |
744 | ||
745 | cmp \$0,%r9d # enc argument | |
746 | je .LCBC_DECRYPT | |
747 | ||
748 | and \$-16,%rdx | |
749 | and \$15,%rcx # length residue | |
750 | lea ($inp,%rdx),%rdx | |
751 | mov $key,$_key | |
752 | mov %rdx,$_end | |
753 | mov %rcx,$_res | |
754 | ||
755 | cmp $inp,%rdx | |
756 | mov 0(%rbx),@S[0] # load IV | |
757 | mov 4(%rbx),@S[1] | |
758 | mov 8(%rbx),@S[2] | |
759 | mov 12(%rbx),@S[3] | |
760 | je .Lcbc_enc_tail | |
761 | jmp .Lcbc_eloop | |
762 | ||
763 | .align 16 | |
764 | .Lcbc_eloop: | |
765 | xor 0($inp),@S[0] | |
766 | xor 4($inp),@S[1] | |
767 | xor 8($inp),@S[2] | |
768 | bswap @S[0] | |
769 | xor 12($inp),@S[3] | |
770 | bswap @S[1] | |
771 | bswap @S[2] | |
772 | bswap @S[3] | |
773 | ||
774 | call _x86_64_Camellia_encrypt | |
775 | ||
776 | mov $_key,$key # "rewind" the key | |
777 | bswap @S[0] | |
778 | mov $_end,%rdx | |
779 | bswap @S[1] | |
780 | mov $_res,%rcx | |
781 | bswap @S[2] | |
782 | mov @S[0],0($out) | |
783 | bswap @S[3] | |
784 | mov @S[1],4($out) | |
785 | mov @S[2],8($out) | |
786 | lea 16($inp),$inp | |
787 | mov @S[3],12($out) | |
788 | cmp %rdx,$inp | |
789 | lea 16($out),$out | |
790 | jne .Lcbc_eloop | |
791 | ||
792 | cmp \$0,%rcx | |
793 | jne .Lcbc_enc_tail | |
794 | ||
795 | mov $_ivp,$out | |
796 | mov @S[0],0($out) # write out IV residue | |
797 | mov @S[1],4($out) | |
798 | mov @S[2],8($out) | |
799 | mov @S[3],12($out) | |
800 | jmp .Lcbc_done | |
801 | ||
802 | .align 16 | |
803 | .Lcbc_enc_tail: | |
804 | xor %rax,%rax | |
805 | mov %rax,0+$ivec | |
806 | mov %rax,8+$ivec | |
807 | mov %rax,$_res | |
808 | ||
ea4d5005 | 809 | .Lcbc_enc_pushf: |
ae381fef AP |
810 | pushfq |
811 | cld | |
812 | mov $inp,%rsi | |
bec45a35 | 813 | lea 8+$ivec,%rdi |
ae381fef AP |
814 | .long 0x9066A4F3 # rep movsb |
815 | popfq | |
ea4d5005 | 816 | .Lcbc_enc_popf: |
ae381fef AP |
817 | |
818 | lea $ivec,$inp | |
819 | lea 16+$ivec,%rax | |
820 | mov %rax,$_end | |
821 | jmp .Lcbc_eloop # one more time | |
822 | ||
823 | .align 16 | |
824 | .LCBC_DECRYPT: | |
825 | xchg $key,$keyend | |
826 | add \$15,%rdx | |
827 | and \$15,%rcx # length residue | |
828 | and \$-16,%rdx | |
829 | mov $key,$_key | |
830 | lea ($inp,%rdx),%rdx | |
831 | mov %rdx,$_end | |
832 | mov %rcx,$_res | |
833 | ||
834 | mov (%rbx),%rax # load IV | |
835 | mov 8(%rbx),%rbx | |
836 | jmp .Lcbc_dloop | |
837 | .align 16 | |
838 | .Lcbc_dloop: | |
839 | mov 0($inp),@S[0] | |
840 | mov 4($inp),@S[1] | |
841 | mov 8($inp),@S[2] | |
842 | bswap @S[0] | |
843 | mov 12($inp),@S[3] | |
844 | bswap @S[1] | |
845 | mov %rax,0+$ivec # save IV to temporary storage | |
846 | bswap @S[2] | |
847 | mov %rbx,8+$ivec | |
848 | bswap @S[3] | |
849 | ||
850 | call _x86_64_Camellia_decrypt | |
851 | ||
852 | mov $_key,$key # "rewind" the key | |
853 | mov $_end,%rdx | |
854 | mov $_res,%rcx | |
855 | ||
856 | bswap @S[0] | |
857 | mov ($inp),%rax # load IV for next iteration | |
858 | bswap @S[1] | |
859 | mov 8($inp),%rbx | |
860 | bswap @S[2] | |
861 | xor 0+$ivec,@S[0] | |
862 | bswap @S[3] | |
863 | xor 4+$ivec,@S[1] | |
864 | xor 8+$ivec,@S[2] | |
865 | lea 16($inp),$inp | |
866 | xor 12+$ivec,@S[3] | |
867 | cmp %rdx,$inp | |
868 | je .Lcbc_ddone | |
869 | ||
870 | mov @S[0],0($out) | |
871 | mov @S[1],4($out) | |
872 | mov @S[2],8($out) | |
873 | mov @S[3],12($out) | |
874 | ||
875 | lea 16($out),$out | |
876 | jmp .Lcbc_dloop | |
877 | ||
878 | .align 16 | |
879 | .Lcbc_ddone: | |
880 | mov $_ivp,%rdx | |
881 | cmp \$0,%rcx | |
882 | jne .Lcbc_dec_tail | |
883 | ||
884 | mov @S[0],0($out) | |
885 | mov @S[1],4($out) | |
886 | mov @S[2],8($out) | |
887 | mov @S[3],12($out) | |
888 | ||
889 | mov %rax,(%rdx) # write out IV residue | |
890 | mov %rbx,8(%rdx) | |
891 | jmp .Lcbc_done | |
892 | .align 16 | |
893 | .Lcbc_dec_tail: | |
894 | mov @S[0],0+$ivec | |
895 | mov @S[1],4+$ivec | |
896 | mov @S[2],8+$ivec | |
897 | mov @S[3],12+$ivec | |
898 | ||
ea4d5005 | 899 | .Lcbc_dec_pushf: |
ae381fef AP |
900 | pushfq |
901 | cld | |
bec45a35 | 902 | lea 8+$ivec,%rsi |
ae381fef AP |
903 | lea ($out),%rdi |
904 | .long 0x9066A4F3 # rep movsb | |
905 | popfq | |
ea4d5005 | 906 | .Lcbc_dec_popf: |
ae381fef AP |
907 | |
908 | mov %rax,(%rdx) # write out IV residue | |
909 | mov %rbx,8(%rdx) | |
910 | jmp .Lcbc_done | |
911 | ||
912 | .align 16 | |
913 | .Lcbc_done: | |
914 | mov $_rsp,%rcx | |
49508b23 | 915 | .cfi_def_cfa %rcx,56 |
ae381fef | 916 | mov 0(%rcx),%r15 |
49508b23 | 917 | .cfi_restore %r15 |
ae381fef | 918 | mov 8(%rcx),%r14 |
49508b23 | 919 | .cfi_restore %r14 |
ae381fef | 920 | mov 16(%rcx),%r13 |
49508b23 | 921 | .cfi_restore %r13 |
ae381fef | 922 | mov 24(%rcx),%r12 |
49508b23 | 923 | .cfi_restore %r12 |
ae381fef | 924 | mov 32(%rcx),%rbp |
49508b23 | 925 | .cfi_restore %rbp |
ae381fef | 926 | mov 40(%rcx),%rbx |
49508b23 | 927 | .cfi_restore %rbx |
ae381fef | 928 | lea 48(%rcx),%rsp |
49508b23 | 929 | .cfi_def_cfa %rsp,8 |
ae381fef AP |
930 | .Lcbc_abort: |
931 | ret | |
49508b23 | 932 | .cfi_endproc |
ae381fef AP |
933 | .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt |
934 | ||
dec95a12 | 935 | .asciz "Camellia for x86_64 by <appro\@openssl.org>" |
ae381fef AP |
936 | ___ |
937 | } | |
938 | ||
939 | # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, | |
940 | # CONTEXT *context,DISPATCHER_CONTEXT *disp) | |
941 | if ($win64) { | |
942 | $rec="%rcx"; | |
943 | $frame="%rdx"; | |
944 | $context="%r8"; | |
945 | $disp="%r9"; | |
946 | ||
947 | $code.=<<___; | |
948 | .extern __imp_RtlVirtualUnwind | |
949 | .type common_se_handler,\@abi-omnipotent | |
950 | .align 16 | |
951 | common_se_handler: | |
952 | push %rsi | |
953 | push %rdi | |
954 | push %rbx | |
955 | push %rbp | |
702e7425 | 956 | push %r12 |
ae381fef AP |
957 | push %r13 |
958 | push %r14 | |
959 | push %r15 | |
960 | pushfq | |
702e7425 | 961 | lea -64(%rsp),%rsp |
ae381fef AP |
962 | |
963 | mov 120($context),%rax # pull context->Rax | |
964 | mov 248($context),%rbx # pull context->Rip | |
965 | ||
966 | mov 8($disp),%rsi # disp->ImageBase | |
967 | mov 56($disp),%r11 # disp->HandlerData | |
968 | ||
969 | mov 0(%r11),%r10d # HandlerData[0] | |
970 | lea (%rsi,%r10),%r10 # prologue label | |
971 | cmp %r10,%rbx # context->Rip<prologue label | |
972 | jb .Lin_prologue | |
973 | ||
974 | mov 152($context),%rax # pull context->Rsp | |
975 | ||
976 | mov 4(%r11),%r10d # HandlerData[1] | |
977 | lea (%rsi,%r10),%r10 # epilogue label | |
978 | cmp %r10,%rbx # context->Rip>=epilogue label | |
979 | jae .Lin_prologue | |
980 | ||
981 | lea 40(%rax),%rax | |
982 | mov -8(%rax),%rbx | |
983 | mov -16(%rax),%rbp | |
984 | mov -24(%rax),%r13 | |
985 | mov -32(%rax),%r14 | |
986 | mov -40(%rax),%r15 | |
987 | mov %rbx,144($context) # restore context->Rbx | |
988 | mov %rbp,160($context) # restore context->Rbp | |
989 | mov %r13,224($context) # restore context->R13 | |
990 | mov %r14,232($context) # restore context->R14 | |
991 | mov %r15,240($context) # restore context->R15 | |
992 | ||
993 | .Lin_prologue: | |
994 | mov 8(%rax),%rdi | |
995 | mov 16(%rax),%rsi | |
996 | mov %rax,152($context) # restore context->Rsp | |
997 | mov %rsi,168($context) # restore context->Rsi | |
998 | mov %rdi,176($context) # restore context->Rdi | |
999 | ||
702e7425 | 1000 | jmp .Lcommon_seh_exit |
ae381fef AP |
1001 | .size common_se_handler,.-common_se_handler |
1002 | ||
1003 | .type cbc_se_handler,\@abi-omnipotent | |
1004 | .align 16 | |
1005 | cbc_se_handler: | |
1006 | push %rsi | |
1007 | push %rdi | |
1008 | push %rbx | |
1009 | push %rbp | |
1010 | push %r12 | |
1011 | push %r13 | |
1012 | push %r14 | |
1013 | push %r15 | |
1014 | pushfq | |
1015 | lea -64(%rsp),%rsp | |
1016 | ||
1017 | mov 120($context),%rax # pull context->Rax | |
1018 | mov 248($context),%rbx # pull context->Rip | |
1019 | ||
1020 | lea .Lcbc_prologue(%rip),%r10 | |
1021 | cmp %r10,%rbx # context->Rip<.Lcbc_prologue | |
1022 | jb .Lin_cbc_prologue | |
1023 | ||
1024 | lea .Lcbc_body(%rip),%r10 | |
1025 | cmp %r10,%rbx # context->Rip<.Lcbc_body | |
1026 | jb .Lin_cbc_frame_setup | |
1027 | ||
1028 | mov 152($context),%rax # pull context->Rsp | |
1029 | ||
1030 | lea .Lcbc_abort(%rip),%r10 | |
1031 | cmp %r10,%rbx # context->Rip>=.Lcbc_abort | |
1032 | jae .Lin_cbc_prologue | |
1033 | ||
ea4d5005 AP |
1034 | # handle pushf/popf in Camellia_cbc_encrypt |
1035 | lea .Lcbc_enc_pushf(%rip),%r10 | |
1036 | cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf | |
1037 | jbe .Lin_cbc_no_flag | |
1038 | lea 8(%rax),%rax | |
1039 | lea .Lcbc_enc_popf(%rip),%r10 | |
1040 | cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf | |
1041 | jb .Lin_cbc_no_flag | |
1042 | lea -8(%rax),%rax | |
1043 | lea .Lcbc_dec_pushf(%rip),%r10 | |
1044 | cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf | |
1045 | jbe .Lin_cbc_no_flag | |
1046 | lea 8(%rax),%rax | |
1047 | lea .Lcbc_dec_popf(%rip),%r10 | |
1048 | cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf | |
1049 | jb .Lin_cbc_no_flag | |
1050 | lea -8(%rax),%rax | |
1051 | ||
1052 | .Lin_cbc_no_flag: | |
ae381fef AP |
1053 | mov 48(%rax),%rax # $_rsp |
1054 | lea 48(%rax),%rax | |
1055 | ||
1056 | .Lin_cbc_frame_setup: | |
1057 | mov -8(%rax),%rbx | |
1058 | mov -16(%rax),%rbp | |
1059 | mov -24(%rax),%r12 | |
1060 | mov -32(%rax),%r13 | |
1061 | mov -40(%rax),%r14 | |
1062 | mov -48(%rax),%r15 | |
1063 | mov %rbx,144($context) # restore context->Rbx | |
1064 | mov %rbp,160($context) # restore context->Rbp | |
1065 | mov %r12,216($context) # restore context->R12 | |
1066 | mov %r13,224($context) # restore context->R13 | |
1067 | mov %r14,232($context) # restore context->R14 | |
1068 | mov %r15,240($context) # restore context->R15 | |
1069 | ||
1070 | .Lin_cbc_prologue: | |
1071 | mov 8(%rax),%rdi | |
1072 | mov 16(%rax),%rsi | |
1073 | mov %rax,152($context) # restore context->Rsp | |
1074 | mov %rsi,168($context) # restore context->Rsi | |
1075 | mov %rdi,176($context) # restore context->Rdi | |
1076 | ||
702e7425 AP |
1077 | .align 4 |
1078 | .Lcommon_seh_exit: | |
1079 | ||
ae381fef AP |
1080 | mov 40($disp),%rdi # disp->ContextRecord |
1081 | mov $context,%rsi # context | |
1082 | mov \$`1232/8`,%ecx # sizeof(CONTEXT) | |
1083 | .long 0xa548f3fc # cld; rep movsq | |
1084 | ||
1085 | mov $disp,%rsi | |
1086 | xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER | |
1087 | mov 8(%rsi),%rdx # arg2, disp->ImageBase | |
1088 | mov 0(%rsi),%r8 # arg3, disp->ControlPc | |
1089 | mov 16(%rsi),%r9 # arg4, disp->FunctionEntry | |
1090 | mov 40(%rsi),%r10 # disp->ContextRecord | |
1091 | lea 56(%rsi),%r11 # &disp->HandlerData | |
1092 | lea 24(%rsi),%r12 # &disp->EstablisherFrame | |
1093 | mov %r10,32(%rsp) # arg5 | |
1094 | mov %r11,40(%rsp) # arg6 | |
1095 | mov %r12,48(%rsp) # arg7 | |
1096 | mov %rcx,56(%rsp) # arg8, (NULL) | |
1097 | call *__imp_RtlVirtualUnwind(%rip) | |
1098 | ||
1099 | mov \$1,%eax # ExceptionContinueSearch | |
1100 | lea 64(%rsp),%rsp | |
1101 | popfq | |
1102 | pop %r15 | |
1103 | pop %r14 | |
1104 | pop %r13 | |
1105 | pop %r12 | |
1106 | pop %rbp | |
1107 | pop %rbx | |
1108 | pop %rdi | |
1109 | pop %rsi | |
1110 | ret | |
1111 | .size cbc_se_handler,.-cbc_se_handler | |
1112 | ||
1113 | .section .pdata | |
1114 | .align 4 | |
1115 | .rva .LSEH_begin_Camellia_EncryptBlock_Rounds | |
1116 | .rva .LSEH_end_Camellia_EncryptBlock_Rounds | |
1117 | .rva .LSEH_info_Camellia_EncryptBlock_Rounds | |
1118 | ||
1119 | .rva .LSEH_begin_Camellia_DecryptBlock_Rounds | |
1120 | .rva .LSEH_end_Camellia_DecryptBlock_Rounds | |
1121 | .rva .LSEH_info_Camellia_DecryptBlock_Rounds | |
1122 | ||
1123 | .rva .LSEH_begin_Camellia_Ekeygen | |
1124 | .rva .LSEH_end_Camellia_Ekeygen | |
1125 | .rva .LSEH_info_Camellia_Ekeygen | |
1126 | ||
1127 | .rva .LSEH_begin_Camellia_cbc_encrypt | |
1128 | .rva .LSEH_end_Camellia_cbc_encrypt | |
1129 | .rva .LSEH_info_Camellia_cbc_encrypt | |
1130 | ||
1131 | .section .xdata | |
1132 | .align 8 | |
1133 | .LSEH_info_Camellia_EncryptBlock_Rounds: | |
1134 | .byte 9,0,0,0 | |
1135 | .rva common_se_handler | |
1136 | .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] | |
1137 | .LSEH_info_Camellia_DecryptBlock_Rounds: | |
1138 | .byte 9,0,0,0 | |
1139 | .rva common_se_handler | |
1140 | .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] | |
1141 | .LSEH_info_Camellia_Ekeygen: | |
1142 | .byte 9,0,0,0 | |
1143 | .rva common_se_handler | |
1144 | .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[] | |
1145 | .LSEH_info_Camellia_cbc_encrypt: | |
1146 | .byte 9,0,0,0 | |
1147 | .rva cbc_se_handler | |
1148 | ___ | |
1149 | } | |
1150 | ||
1151 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
1152 | print $code; | |
1153 | close STDOUT; |